***********************
** FILE NAME : message.cpp
** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
-** VERSION NUMBER : $Revision: 1.11 $
+** VERSION NUMBER : $Revision: 1.12 $
**
** DESCRIPTION : Implementation of the message handling classes
**
/*****************************
$Log: message.cpp,v $
- Revision 1.11 2003-08-05 08:15:00 arjen
+ Revision 1.12 2003-08-11 16:56:16 arjen
+ Different kinds of log files are parsed by a collection of objects
+ of different classes, derived from the base class line_cooker
+ Depending on the message content or the message_type element in
+ XML, one of these objects is selected.
+
+ Logrunner is integrated with gcm_input. Although its functionality
+ is still limited, a connection between logrunner and gcm_input
+ is beginning to form.
+
+ Revision 1.11 2003/08/05 08:15:00 arjen
Debug output to the log stream instead of cerr.
Fixed namespace problems in XPath searches of the DOM.
Moved string utility functions to a separate file.
*****************************/
-static const char *RCSID = "$Id: message.cpp,v 1.11 2003-08-05 08:15:00 arjen Exp $";
+static const char *RCSID = "$Id: message.cpp,v 1.12 2003-08-11 16:56:16 arjen Exp $";
#include <algorithm>
#include <libxml/xpath.h>
#include <libxml/debugXML.h>
#include "message.h"
+//#define DEBUG
+
extern bool verbose; /* Defined in the main application */
extern bool testmode;
extern bool incremental;
static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
-static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
static const regex re_mail_From("^From:[[:blank:]]+");
static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
#endif
item = *res->nodesetval->nodeTab;
+
+ // Select a line cooker based on the message type.
+
+#ifdef DEBUG
+ std::cout << "Looking for a line cooker for " << item->content << "\n";
+#endif
+ list<line_cooker *>::iterator lci = kitchen.begin();
+ pan = 0;
+ while (pan == 0 && lci != kitchen.end())
+ {
+ pan = *lci;
+ if (pan->message_type() != (const char *)(item->content))
+ {
+ pan = 0;
+ }
+ lci++;
+ }
+ if (pan == 0)
+ {
+ *log << "Can not find a line cooker for message type " << item->content << "\n";
+ }
}
else
{
** VARS CHANGED :
** FUNCTIONS USED :
** SEE ALSO :
-** LAST MODIFIED : Apr 28, 2003
+** LAST MODIFIED : Aug 11, 2003
**=========================================================================
*/
}
+ pan = 0;
+
/*
* Now that we have the mail header out of the way, try to figure
* out what the content of the message is.
}
readXMLinput(line);
}
- else if (line == re_syslog)
- {
- certainty = 1.0;
- classification = SYSLOG;
- if (verbose)
- {
- *log << "Syslog detected.\n";
- }
- }
- else if (line == re_syslog_irix)
- {
- certainty = 1.0;
- classification = SYSLOG_IRIX;
- if (verbose)
- {
- *log << "IRIX Syslog detected.\n";
- }
- }
else if (line == re_PGP)
{
certainty = 1.0;
*log << "DUMP output detected.\n";
}
}
- else if (line == re_accesslog)
- {
- certainty = 1.0;
- classification = ACCESSLOG;
- service = "httpd";
- if (verbose)
- {
- *log << "HTTP access log detected.\n";
- }
- }
- else if (line == re_errorlog)
- {
- certainty = 1.0;
- classification = ERRORLOG;
- service = "httpd";
- if (verbose)
- {
- *log << "HTTP error log detected.\n";
- }
- }
else if (line == re_rpm)
{
certainty = 1.0;
*log << "RPM package list detected.\n";
}
}
+ else
+ {
+ // Scan the list of line cookers if there is anything familiar.
+
+ list<line_cooker *>::iterator lci = kitchen.begin();
+ pan = 0;
+ while (pan == 0 && lci != kitchen.end())
+ {
+ pan = *lci;
+ if (!pan->check_pattern(line))
+ {
+ pan = 0;
+ }
+ lci++;
+ }
+ if (pan != 0)
+ {
+ certainty = 1.0;
+ classification = COOKER_OBJECT;
+ if (verbose)
+ {
+ *log << "Detected message type " << pan->message_type() << "\n";
+ }
+ }
+ }
}
input.rewind();
{
if (node->type == XML_ELEMENT_NODE)
{
+ xmlNodePtr item;
+ String log_hostname;
+ UTC log_date;
+ String raw("");;
+ String log_service;
+
if (strcmp((char *)node->name, "raw") == 0)
{
- *log << "Can not cook <raw> log elements yet.\n";
+ item = node->children;
+ if (pan == 0)
+ {
+ *log << "Can not cook this type of <raw> log element.\n";
+ }
+ else
+ {
+ raw = String((const char *)item->content);
+ if (pan->cook_this(raw, arrival))
+ {
+ log_hostname = pan->hostname();
+ if (log_hostname == "")
+ {
+ log_hostname = hostname;
+ }
+ log_service = pan->service();
+ log_date = pan->timestamp();
+ }
+ else
+ {
+ *log << "Log line " << raw << " does not match.\n";
+ raw = "";
+ }
+ }
}
else if (strcmp((char *)node->name, "cooked") == 0)
{
// Find the parts of the log entry
- xmlNodePtr item;
- String log_hostname;
- UTC log_date;
- String raw("");;
- String log_service;
-
if (verbose)
{
*log << "Analyzing cooked element.\n";
*log << "<raw> missing from cooked log element.\n";
}
- if (raw != "" && log_hostname != "" && log_date.proper())
- {
- String insertion("insert into log (objectid, servicecode,"
- " object_timestamp, timestamp, rawdata, processed) values (");
+ }
- /* Insert a new record into the log table */
+ // Insert a new log record into the database.
+ if (raw != "" && log_hostname != "" && log_date.proper())
+ {
+ String insertion("insert into log (objectid, servicecode,"
+ " object_timestamp, timestamp, rawdata, processed) values (");
- insertion += "'" + objectid + "',";
- insertion += "'" + log_service + "',";
- insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
- insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
- insertion += "'" + SQL_Escape(raw) + "',FALSE";
- insertion += ")";
+ /* Insert a new record into the log table */
- if (testmode)
- {
- *log << insertion << "\n";
- }
- else
- {
- database.Query(insertion);
- }
+ insertion += "'" + objectid + "',";
+ insertion += "'" + log_service + "',";
+ insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
+ insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
+ insertion += "'" + SQL_Escape(raw) + "',FALSE";
+ insertion += ")";
- if (verbose)
- {
- *log << "\n\n";
- }
+ if (testmode)
+ {
+ *log << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ *log << "\n\n";
}
}
+
}
node = node->next;
}
/* Scan the input line by line, entring records into the database */
String rest; // Rest of the line to be parsed
+ regex re_any(".*");
while (input >> line)
{
switch (classification)
{
- case SYSLOG:
- check = &re_syslog;
- break;
- case SYSLOG_IRIX:
- check = &re_syslog_irix;
- break;
- case ACCESSLOG:
- check = &re_accesslog;
- break;
- case ERRORLOG:
- check = &re_errorlog;
- break;
case RPMLIST:
check = &re_rpm;
break;
+ case COOKER_OBJECT:
+ check = &re_any;
+ break;
}
if (line == *check)
switch (classification)
{
- case SYSLOG:
- datestring = line(0,16);
- log_date = datestring;
- log_time = datestring;
- if (log_date.Year() < 0 || log_date.Year() > 2500)
- {
- // The year is not in the log file. Assume the year of arrival,
- // unless this puts the log entry at a later date than the arrival date.
- // This happens e.g. when a log entry from December arrives in Januari.
-
- log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
- if (log_date > date(arrival))
- {
- log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
- }
- }
-
- if (verbose)
- {
- *log << " Log timestamp = " << log_date << " " << log_time << "\n";
- }
- rest = line << 16;
- i = rest.index(' ');
- if (rest(0,i) == hostname(0,i))
- {
- rest <<= i + 1;
- if (verbose)
- {
- *log << " Hostname matches.\n";
- *log << " rest = " << rest << "\n";
- }
- for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
- if (verbose)
- {
- *log << " Service name = " << rest(0,i) << "\n";
- }
-
- /* Insert a new record into the log table */
-
- insertion += "'" + objectid + "',";
- insertion += "'" + rest(0,i) + "',";
- insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
- insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
- insertion += "'" + SQL_Escape(line) + "',FALSE";
- insertion += ")";
-
- if (testmode)
- {
- *log << insertion << "\n";
- }
- else
- {
- database.Query(insertion);
- }
-
- if (verbose)
- {
- *log << "\n\n";
- }
-
- nr_lines++;
- }
- else
- {
- *log << " Hostname " << rest(0,i) << " does not match.\n";
- }
- break;
-
- case SYSLOG_IRIX:
- datestring = line(0,16);
- log_date = datestring;
- log_time = datestring;
- if (log_date.Year() < 0 || log_date.Year() > 2500)
- {
- // The year is not in the log file. Assume the year of arrival,
- // unless this puts the log entry at a later date than the arrival date.
- // This happens e.g. when a log entry from December arrives in Januari.
-
- log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
- if (log_date > date(arrival))
- {
- log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
- }
- }
+ case COOKER_OBJECT:
+#ifdef DEBUG
+ std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
+#endif
+ pan->cook_this(line, arrival);
- if (verbose)
- {
- *log << " Log timestamp = " << log_date << " " << log_time << "\n";
- }
- rest = line << 19;
- i = rest.index(' ');
- if (rest(0,i) == hostname(0,i))
+ if (pan->hostname() == hostname(0,~pan->hostname()))
{
- rest <<= i + 1;
- if (verbose)
- {
- *log << " Hostname matches.\n";
- *log << " rest = " << rest << "\n";
- }
- for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
- if (verbose)
- {
- *log << " Service name = " << rest(0,i) << "\n";
- }
+#ifdef DEBUG
+ std::cerr << " Information from cooker:\n";
+ std::cerr << " timestamp = " << pan->timestamp() << "\n";
+ std::cerr << " hostname = " << pan->hostname() << "\n";
+ std::cerr << " service = " << pan->service() << "\n";
+#endif
/* Insert a new record into the log table */
insertion += "'" + objectid + "',";
- insertion += "'" + rest(0,i) + "',";
- insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
+ insertion += "'" + pan->service() + "',";
+ insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
insertion += "'" + SQL_Escape(line) + "',FALSE";
insertion += ")";
}
else
{
- *log << " Hostname " << rest(0,i) << " does not match.\n";
- }
- break;
-
- case ACCESSLOG:
- datestring = line(regex("\\[.+\\]"));
- datestring <<= 1;
- datestring >>= 1;
- datestring[datestring.index(':')] = ' ';
- log_date = datestring;
- log_time = datestring;
- if (verbose)
- {
- *log << " Log timestamp = " << log_date << " " << log_time << "\n";
- }
- insertion += "'" + objectid + "',";
- insertion += "'" + service + "',";
- insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
- insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
- insertion += "'" + SQL_Escape(line) + "',FALSE";
- insertion += ")";
-
- if (testmode)
- {
- *log << insertion << "\n";
- }
- else
- {
- database.Query(insertion);
- }
-
- if (verbose)
- {
- *log << "\n\n";
+ *log << " Hostname " << pan->hostname() << " does not match.\n";
}
-
- nr_lines++;
break;
- case ERRORLOG:
- datestring = line(regex("\\[.+\\]"));
- datestring <<= 1;
- datestring >>= 1;
- log_date = datestring;
- log_time = datestring;
- if (verbose)
- {
- *log << " Log timestamp = " << log_date << " " << log_time << "\n";
- }
- insertion += "'" + objectid + "',";
- insertion += "'" + service + "',";
- insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
- insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
- insertion += "'" + SQL_Escape(line) + "',FALSE";
- insertion += ")";
-
- if (testmode)
- {
- *log << insertion << "\n";
- }
- else
- {
- database.Query(insertion);
- }
-
- if (verbose)
- {
- *log << "\n\n";
- }
-
- nr_lines++;
- break;
case RPMLIST:
// Scan a list of packages and versions from "rpm -a".