Different kinds of log files are parsed by a collection of objects
[gnucomo.git] / src / gcm_input / message.cpp
index 363aaa2..2c5d3f4 100644 (file)
@@ -8,7 +8,7 @@
 ***********************
 **      FILE NAME      : message.cpp
 **      SYSTEM NAME    : Gnucomo - Gnu Computer Monitoring
-**      VERSION NUMBER : $Revision: 1.11 $
+**      VERSION NUMBER : $Revision: 1.12 $
 **
 **  DESCRIPTION      :  Implementation of the message handling classes
 **
 
 /*****************************
    $Log: message.cpp,v $
-   Revision 1.11  2003-08-05 08:15:00  arjen
+   Revision 1.12  2003-08-11 16:56:16  arjen
+   Different kinds of log files are parsed by a collection of objects
+   of different classes, derived from the base class line_cooker
+   Depending on the message content or the message_type element in
+   XML, one of these objects is selected.
+
+   Logrunner is integrated with gcm_input. Although its functionality
+   is still limited, a connection between logrunner and gcm_input
+   is beginning to form.
+
+   Revision 1.11  2003/08/05 08:15:00  arjen
    Debug output to the log stream instead of cerr.
    Fixed namespace problems in XPath searches of the DOM.
    Moved string utility functions to a separate file.
 
 *****************************/
 
-static const char *RCSID = "$Id: message.cpp,v 1.11 2003-08-05 08:15:00 arjen Exp $";
+static const char *RCSID = "$Id: message.cpp,v 1.12 2003-08-11 16:56:16 arjen Exp $";
 
 #include <algorithm>
 #include <libxml/xpath.h>
 #include <libxml/debugXML.h>
 #include "message.h"
 
+//#define DEBUG
+
 extern bool verbose;   /*  Defined in the main application */
 extern bool testmode;
 extern bool incremental;
@@ -155,7 +167,6 @@ static const regex re_accesslog("(GET|POST) .+ HTTP");
 static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
 
-static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
 static const regex re_mail_From("^From:[[:blank:]]+");
 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
@@ -221,6 +232,27 @@ int client_message::readXMLinput(String first_line)
       xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
 #endif
       item = *res->nodesetval->nodeTab;
+
+      //  Select a line cooker based on the message type.
+
+#ifdef DEBUG
+      std::cout << "Looking for a line cooker for " << item->content << "\n";
+#endif
+      list<line_cooker *>::iterator lci = kitchen.begin();
+      pan = 0;
+      while (pan == 0 && lci != kitchen.end())
+      {
+         pan = *lci;
+         if (pan->message_type() != (const char *)(item->content))
+         {
+            pan = 0;
+         }
+         lci++;
+      }
+      if (pan == 0)
+      {
+         *log << "Can not find a line cooker for message type " << item->content << "\n";
+      }
    }
    else
    {
@@ -270,7 +302,7 @@ int client_message::readXMLinput(String first_line)
 **  VARS CHANGED   :
 **  FUNCTIONS USED :
 **  SEE ALSO       :
-**  LAST MODIFIED  : Apr 28, 2003
+**  LAST MODIFIED  : Aug 11, 2003
 **=========================================================================
 */
 
@@ -327,6 +359,8 @@ double client_message::classify(String host, UTC arriv, String serv)
 
    }
 
+   pan = 0;
+
    /*
     *  Now that we have the mail header out of the way, try to figure
     *  out what the content of the message is.
@@ -350,24 +384,6 @@ double client_message::classify(String host, UTC arriv, String serv)
          }
          readXMLinput(line);
       }
-      else if (line == re_syslog)
-      {
-         certainty = 1.0;
-         classification = SYSLOG;
-         if (verbose)
-         {
-            *log << "Syslog detected.\n";
-         }
-      }
-      else if (line == re_syslog_irix)
-      {
-         certainty = 1.0;
-         classification = SYSLOG_IRIX;
-         if (verbose)
-         {
-            *log << "IRIX Syslog detected.\n";
-         }
-      }
       else if (line == re_PGP)
       {
          certainty = 1.0;
@@ -382,26 +398,6 @@ double client_message::classify(String host, UTC arriv, String serv)
              *log << "DUMP output detected.\n";
           }
       }
-      else if (line == re_accesslog)
-      {
-          certainty = 1.0;
-          classification = ACCESSLOG;
-          service = "httpd";
-          if (verbose)
-          {
-             *log << "HTTP access log detected.\n";
-          }
-      }
-      else if (line == re_errorlog)
-      {
-          certainty = 1.0;
-          classification = ERRORLOG;
-          service = "httpd";
-          if (verbose)
-          {
-             *log << "HTTP error log detected.\n";
-          }
-      }
       else if (line == re_rpm)
       {
           certainty = 1.0;
@@ -412,6 +408,31 @@ double client_message::classify(String host, UTC arriv, String serv)
              *log << "RPM package list detected.\n";
           }
       }
+      else
+      {
+         //  Scan the list of line cookers if there is anything familiar.
+
+         list<line_cooker *>::iterator lci = kitchen.begin();
+         pan = 0;
+         while (pan == 0 && lci != kitchen.end())
+         {
+            pan = *lci;
+            if (!pan->check_pattern(line))
+            {
+               pan = 0;
+            }
+            lci++;
+         }
+         if (pan != 0)
+         {
+            certainty = 1.0;
+            classification = COOKER_OBJECT;
+            if (verbose)
+            {
+               *log << "Detected message type " << pan->message_type() << "\n";
+            }
+         }
+      }
    }
    input.rewind();
 
@@ -497,20 +518,43 @@ void client_message::enterXML()
          {
             if (node->type == XML_ELEMENT_NODE)
             {
+               xmlNodePtr  item;
+               String      log_hostname;
+               UTC         log_date;
+               String      raw("");;
+               String      log_service;
+
                if (strcmp((char *)node->name, "raw") == 0)
                {
-                  *log << "Can not cook <raw> log elements yet.\n";
+                  item = node->children;
+                  if (pan == 0)
+                  {
+                     *log << "Can not cook this type of <raw> log element.\n";
+                  }
+                  else
+                  {
+                     raw = String((const char *)item->content);
+                     if (pan->cook_this(raw, arrival))
+                     {
+                        log_hostname = pan->hostname();
+                        if (log_hostname == "")
+                        {
+                           log_hostname = hostname;
+                        }
+                        log_service = pan->service();
+                        log_date    = pan->timestamp();
+                     }
+                     else
+                     {
+                        *log << "Log line " << raw << " does not match.\n";
+                        raw = "";
+                     }
+                  }
                }
                else if (strcmp((char *)node->name, "cooked") == 0)
                {
                   //  Find the parts of the log entry
 
-                  xmlNodePtr  item;
-                  String      log_hostname;
-                  UTC         log_date;
-                  String      raw("");;
-                  String      log_service;
-
                   if (verbose)
                   {
                      *log << "Analyzing cooked element.\n";
@@ -566,35 +610,38 @@ void client_message::enterXML()
                      *log << "<raw> missing from cooked log element.\n";
                   }
 
-                  if (raw != "" && log_hostname != "" && log_date.proper())
-                  {
-                     String insertion("insert into log (objectid, servicecode,"
-                           " object_timestamp, timestamp, rawdata, processed) values (");
+               }
 
-                     /*   Insert a new record into the log table   */
+               //   Insert a new log record into the database.
+               if (raw != "" && log_hostname != "" && log_date.proper())
+               {
+                  String insertion("insert into log (objectid, servicecode,"
+                        " object_timestamp, timestamp, rawdata, processed) values (");
 
-                     insertion += "'" + objectid + "',";
-                     insertion += "'" + log_service + "',";
-                     insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
-                     insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
-                     insertion += "'" + SQL_Escape(raw) + "',FALSE";
-                     insertion += ")";
+                  /*   Insert a new record into the log table   */
 
-                     if (testmode)
-                     {
-                        *log << insertion << "\n";
-                     }
-                     else
-                     {
-                        database.Query(insertion);
-                     }
+                  insertion += "'" + objectid + "',";
+                  insertion += "'" + log_service + "',";
+                  insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
+                  insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
+                  insertion += "'" + SQL_Escape(raw) + "',FALSE";
+                  insertion += ")";
 
-                     if (verbose)
-                     {
-                        *log << "\n\n";
-                     }
+                  if (testmode)
+                  {
+                     *log << insertion << "\n";
+                  }
+                  else
+                  {
+                     database.Query(insertion);
+                  }
+
+                  if (verbose)
+                  {
+                     *log << "\n\n";
                   }
                }
             }
             node = node->next;
          }
@@ -702,6 +749,7 @@ int client_message::enter()
    /*  Scan the input line by line, entring records into the database */
 
    String rest;   //  Rest of the line to be parsed
+   regex  re_any(".*");
 
    while (input >> line)
    {
@@ -717,21 +765,12 @@ int client_message::enter()
 
       switch (classification)
       {
-      case SYSLOG:
-            check = &re_syslog;
-            break;
-      case SYSLOG_IRIX:
-            check = &re_syslog_irix;
-            break;
-      case ACCESSLOG:
-            check = &re_accesslog;
-            break;
-      case ERRORLOG:
-            check = &re_errorlog;
-            break;
       case RPMLIST:
             check = &re_rpm;
             break;
+      case COOKER_OBJECT:
+            check = &re_any;
+            break;
       }
 
       if (line == *check)
@@ -746,116 +785,26 @@ int client_message::enter()
 
          switch (classification)
          {
-         case SYSLOG:
-            datestring = line(0,16);
-            log_date = datestring;
-            log_time = datestring;
-            if (log_date.Year() < 0 || log_date.Year() > 2500)
-            {
-               //  The year is not in the log file. Assume the year of arrival,
-               //  unless this puts the log entry at a later date than the arrival date.
-               //  This happens e.g. when a log entry from December arrives in Januari.
-
-               log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
-               if (log_date > date(arrival))
-               {
-                  log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
-               }
-            }
-
-            if (verbose)
-            {
-               *log << "   Log timestamp  = " << log_date << " " << log_time << "\n";
-            }
-            rest = line << 16;
-            i = rest.index(' ');
-            if (rest(0,i) == hostname(0,i))
-            {
-               rest <<= i + 1;
-               if (verbose)
-               {
-                  *log << "   Hostname matches.\n";
-                  *log << "   rest = " << rest << "\n";
-               }
-               for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
-               if (verbose)
-               {
-                  *log << "   Service name = " << rest(0,i) << "\n";
-               }
-
-               /*   Insert a new record into the log table   */
-
-               insertion += "'" + objectid + "',";
-               insertion += "'" + rest(0,i) + "',";
-               insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
-               insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
-               insertion += "'" + SQL_Escape(line) + "',FALSE";
-               insertion += ")";
-
-               if (testmode)
-               {
-                  *log << insertion << "\n";
-               }
-               else
-               {
-                  database.Query(insertion);
-               }
-
-               if (verbose)
-               {
-                  *log << "\n\n";
-               }
-
-               nr_lines++;
-            }
-            else
-            {
-               *log << "   Hostname " << rest(0,i) << " does not match.\n";
-            }
-            break;
-
-         case SYSLOG_IRIX:
-            datestring = line(0,16);
-            log_date = datestring;
-            log_time = datestring;
-            if (log_date.Year() < 0 || log_date.Year() > 2500)
-            {
-               //  The year is not in the log file. Assume the year of arrival,
-               //  unless this puts the log entry at a later date than the arrival date.
-               //  This happens e.g. when a log entry from December arrives in Januari.
-
-               log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
-               if (log_date > date(arrival))
-               {
-                  log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
-               }
-            }
+         case COOKER_OBJECT:
+#ifdef DEBUG
+            std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
+#endif
+            pan->cook_this(line, arrival);
 
-            if (verbose)
-            {
-               *log << "   Log timestamp  = " << log_date << " " << log_time << "\n";
-            }
-            rest = line << 19;
-            i = rest.index(' ');
-            if (rest(0,i) == hostname(0,i))
+            if (pan->hostname() == hostname(0,~pan->hostname()))
             {
-               rest <<= i + 1;
-               if (verbose)
-               {
-                  *log << "   Hostname matches.\n";
-                  *log << "   rest = " << rest << "\n";
-               }
-               for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
-               if (verbose)
-               {
-                  *log << "   Service name = " << rest(0,i) << "\n";
-               }
 
+#ifdef DEBUG
+               std::cerr << " Information from cooker:\n";
+               std::cerr << "     timestamp = " << pan->timestamp() << "\n";
+               std::cerr << "     hostname  = " << pan->hostname() << "\n";
+               std::cerr << "     service   = " << pan->service() << "\n";
+#endif
                /*   Insert a new record into the log table   */
 
                insertion += "'" + objectid + "',";
-               insertion += "'" + rest(0,i) + "',";
-               insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
+               insertion += "'" + pan->service() + "',";
+               insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
                insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
                insertion += "'" + SQL_Escape(line) + "',FALSE";
                insertion += ")";
@@ -878,78 +827,10 @@ int client_message::enter()
             }
             else
             {
-               *log << "   Hostname " << rest(0,i) << " does not match.\n";
-            }
-            break;
-
-         case ACCESSLOG:
-            datestring = line(regex("\\[.+\\]"));
-            datestring <<= 1;
-            datestring >>= 1;
-            datestring[datestring.index(':')] = ' ';
-            log_date = datestring;
-            log_time = datestring;
-            if (verbose)
-            {
-               *log << "   Log timestamp  = " << log_date << " " << log_time << "\n";
-            }
-            insertion += "'" + objectid + "',";
-            insertion += "'" + service + "',";
-            insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
-            insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
-            insertion += "'" + SQL_Escape(line) + "',FALSE";
-            insertion += ")";
-
-            if (testmode)
-            {
-               *log << insertion << "\n";
-            }
-            else
-            {
-               database.Query(insertion);
-            }
-
-            if (verbose)
-            {
-               *log << "\n\n";
+               *log << "   Hostname " << pan->hostname() << " does not match.\n";
             }
-
-            nr_lines++;
             break;
 
-         case ERRORLOG:
-            datestring = line(regex("\\[.+\\]"));
-            datestring <<= 1;
-            datestring >>= 1;
-            log_date = datestring;
-            log_time = datestring;
-            if (verbose)
-            {
-               *log << "   Log timestamp  = " << log_date << " " << log_time << "\n";
-            }
-            insertion += "'" + objectid + "',";
-            insertion += "'" + service + "',";
-            insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
-            insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
-            insertion += "'" + SQL_Escape(line) + "',FALSE";
-            insertion += ")";
-
-            if (testmode)
-            {
-               *log << insertion << "\n";
-            }
-            else
-            {
-               database.Query(insertion);
-            }
-
-            if (verbose)
-            {
-               *log << "\n\n";
-            }
-
-            nr_lines++;
-            break;
 
          case RPMLIST:
             //  Scan a list of packages and versions from "rpm -a".