***********************
** FILE NAME : message.cpp
** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
-** VERSION NUMBER : $Revision: 1.7 $
+** VERSION NUMBER : $Revision: 1.10 $
**
** DESCRIPTION : Implementation of the message handling classes
**
********************************
** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
** CREATION DATE : Sep 16, 2002
-** LAST UPDATE : Feb 19, 2003
+** LAST UPDATE : Apr 29, 2003
** MODIFICATIONS :
**************************************************************************/
/*****************************
$Log: message.cpp,v $
- Revision 1.7 2003-02-21 08:08:05 arjen
+ Revision 1.10 2003-04-29 09:16:44 arjen
+ Read XML input,
+ Only cooked log entries for now.
+
+ Revision 1.9 2003/03/29 09:04:10 arjen
+ Extract the hostname out of the 'From:' or 'Message-Id:' line
+ of an email header.
+
+ Revision 1.8 2003/03/16 09:42:40 arjen
+ Read IRIX system logs.
+
+ Revision 1.7 2003/02/21 08:08:05 arjen
Gcm_input also detects packages that are removed from the system.
Determining the version number of a package in a RPM
list is improved. Only the last one or two parts of the string that
*****************************/
-static const char *RCSID = "$Id: message.cpp,v 1.7 2003-02-21 08:08:05 arjen Exp $";
+static const char *RCSID = "$Id: message.cpp,v 1.10 2003-04-29 09:16:44 arjen Exp $";
#include <algorithm>
+#include <libxml/xpath.h>
+#include <libxml/debugXML.h>
#include "message.h"
extern bool verbose; /* Defined in the main application */
mail_header = false;
gpg_encrypted = false;
classification = UNKNOWN;
+ xmlDom = NULL;
certainty = 0.0;
}
static const String syslog_date_re("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
+static const String email_address_re("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
static const regex re_syslog(syslog_date_re + " [[:alnum:]]+ [[:alpha:]]+.*:.+");
+static const regex re_syslog_irix(syslog_date_re + " [0-7][A-T]:[[:alnum:]]+ [[:alpha:]]+.*:.+");
static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
static const regex re_dump("^ *DUMP: Date of this level");
static const regex re_accesslog("(GET|POST) .+ HTTP");
static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
-static const regex re_uxmail_from("^From - " + unix_date_re);
+static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
static const regex re_mail_From("^From:[[:blank:]]+");
static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
+static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
static const regex re_email_user("[[:alnum:]_.-]+@");
+static const regex re_xml_header("\?xml .*\?>$");
+
+/*=========================================================================
+** NAME : readXMLinput
+** SYNOPSIS : int readXMLinput(String first_line)
+** PARAMETERS :
+** RETURN VALUE : Parse the XML input and extract the header information
+**
+** DESCRIPTION :
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED : Apr 28, 2003
+**=========================================================================
+*/
+
+int client_message::readXMLinput(String first_line)
+{
+ xmlParserCtxtPtr ctxt;
+ String line;
+ xmlNodePtr root, item;
+
+ xmlXPathObjectPtr res;
+ xmlXPathContextPtr pathcontext;
+
+
+ ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL);
+ while (input >> line)
+ {
+ xmlParseChunk(ctxt, line, ~line, 0);
+ }
+ xmlParseChunk(ctxt, "", 0, 1);
+ xmlDom = ctxt->myDoc;
+ xmlFreeParserCtxt(ctxt);
+
+ root = xmlDocGetRootElement(xmlDom);
+ //TODO Ought to check root->name and root->ns->href
+
+ pathcontext = xmlXPathNewContext(xmlDom);
+ pathcontext->node = xmlDocGetRootElement(xmlDom);
+
+ res = xmlXPathEval((const xmlChar *)"header/messagetype/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ }
+ res = xmlXPathEval((const xmlChar *)"header/hostname/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ hostname = (const char *)item->content;
+ }
+ res = xmlXPathEval((const xmlChar *)"header/service/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ service = (const char *)item->content;
+ }
+ res = xmlXPathEval((const xmlChar *)"header/time/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ arrival = String((char *)item->content);
+
+ }
+ //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
+
+}
/*=========================================================================
** NAME : classify
** VARS CHANGED :
** FUNCTIONS USED :
** SEE ALSO :
-** LAST MODIFIED : Nov 16, 2002
+** LAST MODIFIED : Apr 28, 2003
**=========================================================================
*/
{
from_address = line(re_email_address);
from_address(re_email_user) = ""; // Remove the user part;
- hostname = from_address;
+ if (from_address != "")
+ {
+ hostname = from_address;
+ }
+ }
+ if (line == re_mail_MsId)
+ {
+ from_address = line(re_email_address);
+ from_address(re_email_user) = ""; // Remove the user part;
+ if (from_address != "")
+ {
+ hostname = from_address;
+ }
}
if (line == re_mail_Date)
{
{
// Push the first line back, we need to read it again.
--input;
+
}
/*
while (input >> line && certainty < 0.9)
{
- std::cout << " testing: " << line << "\n";
- if (line == re_syslog)
+ if (verbose)
+ {
+ std::cout << " testing: " << line << "\n";
+ }
+
+ if (line == re_xml_header)
+ {
+ certainty = 1.0;
+ classification = XML;
+ if (verbose)
+ {
+ std::cout << "XML input detected.\n";
+ }
+ readXMLinput(line);
+ }
+ else if (line == re_syslog)
{
certainty = 1.0;
classification = SYSLOG;
std::cout << "Syslog detected.\n";
}
}
+ else if (line == re_syslog_irix)
+ {
+ certainty = 1.0;
+ classification = SYSLOG_IRIX;
+ if (verbose)
+ {
+ std::cout << "IRIX Syslog detected.\n";
+ }
+ }
else if (line == re_PGP)
{
certainty = 1.0;
}
/*=========================================================================
+** NAME : enterXML
+** SYNOPSIS : int enterXML()
+** PARAMETERS :
+** RETURN VALUE : None
+**
+** DESCRIPTION : Analyze the DOM tree from the XML input.
+** The DOM tree was previously parsed by readXMLinput().
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED : Apr 29, 2003
+**=========================================================================
+*/
+
+void client_message::enterXML()
+{
+ xmlXPathObjectPtr res;
+ xmlXPathContextPtr pathcontext;
+
+ /* Try to find the host in the database */
+
+ String objectid;
+
+ objectid = database.find_host(hostname);
+ if (objectid == "")
+ {
+ std::cerr << "Please define the host " << hostname << " in the database.\n";
+ return;
+ }
+ if (verbose)
+ {
+ std::cout << "Object id for " << hostname << " is " << objectid << "\n";
+ }
+
+ pathcontext = xmlXPathNewContext(xmlDom);
+ pathcontext->node = xmlDocGetRootElement(xmlDom);
+ res = xmlXPathEval((const xmlChar *)"data/node()", pathcontext);
+
+ if (res->nodesetval != NULL)
+ {
+ // Find the first child element of the <data> element.
+
+ xmlNodePtr node = *res->nodesetval->nodeTab;
+ while (node->type != XML_ELEMENT_NODE)
+ {
+ node = node->next;
+ }
+ if (strcmp((char *)node->name, "log") == 0)
+ {
+ // Each child contains a log entry, raw or cooked.
+
+ node = node->children;
+ while (node != NULL)
+ {
+ if (node->type == XML_ELEMENT_NODE)
+ {
+ if (strcmp((char *)node->name, "raw") == 0)
+ {
+ std::cerr << "Can not cook <raw> log elements yet.\n";
+ }
+ else if (strcmp((char *)node->name, "cooked") == 0)
+ {
+ // Find the parts of the log entry
+
+ xmlNodePtr item;
+ String log_hostname;
+ UTC log_date;
+ String raw("");;
+ String log_service;
+
+ if (verbose)
+ {
+ std::cout << "Analyzing cooked element.\n";
+ }
+ pathcontext->node = node;
+
+ res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ log_hostname = (const char *)item->content;
+ if (log_hostname != hostname(0, ~log_hostname))
+ {
+ std::cerr << "Hostname " << log_hostname << " does not match.\n";
+ log_hostname = "";
+ }
+ }
+ else
+ {
+ log_hostname = hostname;
+ }
+
+ res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ log_service = (const char *)item->content;
+ }
+ else
+ {
+ log_service = service;
+ }
+
+ res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ log_date = String((const char *)item->content);
+ }
+ else
+ {
+ std::cerr << "<timestamp> missing from cooked log element.\n";
+ }
+
+ res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
+ if (res->nodesetval != NULL)
+ {
+ item = *res->nodesetval->nodeTab;
+ raw = String((const char *)item->content);
+ }
+ else
+ {
+ std::cerr << "<raw> missing from cooked log element.\n";
+ }
+
+ if (raw != "" && log_hostname != "" && log_date.proper())
+ {
+ String insertion("insert into log (objectid, servicecode,"
+ " object_timestamp, timestamp, rawdata, processed) values (");
+
+ /* Insert a new record into the log table */
+
+ insertion += "'" + objectid + "',";
+ insertion += "'" + log_service + "',";
+ insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
+ insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
+ insertion += "'" + SQL_Escape(raw) + "',FALSE";
+ insertion += ")";
+
+ if (testmode)
+ {
+ std::cout << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ std::cout << "\n\n";
+ }
+ }
+ }
+ }
+ node = node->next;
+ }
+ }
+ else
+ {
+ std::cerr << "Data element " << node->name << " is not supported.\n";
+ }
+ }
+ else
+ {
+ std::cerr << "Data node not found.\n";
+ }
+}
+
+/*=========================================================================
** NAME : enter
** SYNOPSIS : int enter()
** PARAMETERS :
** VARS CHANGED :
** FUNCTIONS USED :
** SEE ALSO :
-** LAST MODIFIED : Feb 19, 2003
+** LAST MODIFIED : Mar 28, 2003
**=========================================================================
*/
int client_message::enter()
{
+ if (classification == XML)
+ {
+ enterXML();
+ return 1;
+ }
+
long nr_lines = 0;
String line;
String qry;
case SYSLOG:
check = &re_syslog;
break;
+ case SYSLOG_IRIX:
+ check = &re_syslog_irix;
+ break;
case ACCESSLOG:
check = &re_accesslog;
break;
}
break;
+ case SYSLOG_IRIX:
+ log_date = line;
+ log_time = line;
+ if (log_date.Year() < 0 || log_date.Year() > 2500)
+ {
+ // The year is not in the log file. Assume the year of arrival,
+ // unless this puts the log entry at a later date than the arrival date.
+ // This happens e.g. when a log entry from December arrives in Januari.
+
+ log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
+ if (log_date > date(arrival))
+ {
+ log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
+ }
+ }
+
+ if (verbose)
+ {
+ std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
+ }
+ rest = line << 19;
+ i = rest.index(' ');
+ if (rest(0,i) == hostname(0,i))
+ {
+ rest <<= i + 1;
+ if (verbose)
+ {
+ std::cout << " Hostname matches.\n";
+ std::cout << " rest = " << rest << "\n";
+ }
+ for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
+ if (verbose)
+ {
+ std::cout << " Service name = " << rest(0,i) << "\n";
+ }
+
+ /* Insert a new record into the log table */
+
+ insertion += "'" + objectid + "',";
+ insertion += "'" + rest(0,i) + "',";
+ insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
+ insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
+ insertion += "'" + SQL_Escape(line) + "',FALSE";
+ insertion += ")";
+
+ if (testmode)
+ {
+ std::cout << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ std::cout << "\n\n";
+ }
+
+ nr_lines++;
+ }
+ else
+ {
+ std::cerr << " Hostname " << rest(0,i) << " does not match.\n";
+ }
+ break;
+
case ACCESSLOG:
datestring = line(regex("\\[.+\\]"));
datestring <<= 1;
i++;
}
+ if (!isdigit(line[version_start + 1]))
+ {
+ version_start = next_version_start;
+ }
String package(line(0,version_start));
String version(line(version_start + 1, ~line));
String paramid;