From 66ef806b6da939a527015ba77d3f33931e957f25 Mon Sep 17 00:00:00 2001 From: arjen Date: Tue, 29 Apr 2003 09:16:44 +0000 Subject: [PATCH] Read XML input, Only cooked log entries for now. --- src/gcm_input/message.cpp | 283 ++++++++++++++++++++++++++++++++++++++++++++-- src/gcm_input/message.h | 24 +++- 2 files changed, 294 insertions(+), 13 deletions(-) diff --git a/src/gcm_input/message.cpp b/src/gcm_input/message.cpp index 55c1d1a..5951f21 100644 --- a/src/gcm_input/message.cpp +++ b/src/gcm_input/message.cpp @@ -8,7 +8,7 @@ *********************** ** FILE NAME : message.cpp ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring -** VERSION NUMBER : $Revision: 1.9 $ +** VERSION NUMBER : $Revision: 1.10 $ ** ** DESCRIPTION : Implementation of the message handling classes ** @@ -20,13 +20,17 @@ ******************************** ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl ** CREATION DATE : Sep 16, 2002 -** LAST UPDATE : Mar 28, 2003 +** LAST UPDATE : Apr 29, 2003 ** MODIFICATIONS : **************************************************************************/ /***************************** $Log: message.cpp,v $ - Revision 1.9 2003-03-29 09:04:10 arjen + Revision 1.10 2003-04-29 09:16:44 arjen + Read XML input, + Only cooked log entries for now. + + Revision 1.9 2003/03/29 09:04:10 arjen Extract the hostname out of the 'From:' or 'Message-Id:' line of an email header. @@ -61,9 +65,11 @@ *****************************/ -static const char *RCSID = "$Id: message.cpp,v 1.9 2003-03-29 09:04:10 arjen Exp $"; +static const char *RCSID = "$Id: message.cpp,v 1.10 2003-04-29 09:16:44 arjen Exp $"; #include +#include +#include #include "message.h" extern bool verbose; /* Defined in the main application */ @@ -125,6 +131,7 @@ client_message::client_message(std::istream *in, gnucomo_database db) mail_header = false; gpg_encrypted = false; classification = UNKNOWN; + xmlDom = NULL; certainty = 0.0; } @@ -148,6 +155,76 @@ static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re); static const regex re_mail_MsId("^Message-Id:[[:blank:]]+"); static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+"); static const regex re_email_user("[[:alnum:]_.-]+@"); +static const regex re_xml_header("\?xml .*\?>$"); + +/*========================================================================= +** NAME : readXMLinput +** SYNOPSIS : int readXMLinput(String first_line) +** PARAMETERS : +** RETURN VALUE : Parse the XML input and extract the header information +** +** DESCRIPTION : +** +** VARS USED : +** VARS CHANGED : +** FUNCTIONS USED : +** SEE ALSO : +** LAST MODIFIED : Apr 28, 2003 +**========================================================================= +*/ + +int client_message::readXMLinput(String first_line) +{ + xmlParserCtxtPtr ctxt; + String line; + xmlNodePtr root, item; + + xmlXPathObjectPtr res; + xmlXPathContextPtr pathcontext; + + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL); + while (input >> line) + { + xmlParseChunk(ctxt, line, ~line, 0); + } + xmlParseChunk(ctxt, "", 0, 1); + xmlDom = ctxt->myDoc; + xmlFreeParserCtxt(ctxt); + + root = xmlDocGetRootElement(xmlDom); + //TODO Ought to check root->name and root->ns->href + + pathcontext = xmlXPathNewContext(xmlDom); + pathcontext->node = xmlDocGetRootElement(xmlDom); + + res = xmlXPathEval((const xmlChar *)"header/messagetype/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + } + res = xmlXPathEval((const xmlChar *)"header/hostname/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + hostname = (const char *)item->content; + } + res = xmlXPathEval((const xmlChar *)"header/service/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + service = (const char *)item->content; + } + res = xmlXPathEval((const xmlChar *)"header/time/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + arrival = String((char *)item->content); + + } + //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0); + +} /*========================================================================= ** NAME : classify @@ -161,7 +238,7 @@ static const regex re_email_user("[[:alnum:]_.-]+@"); ** VARS CHANGED : ** FUNCTIONS USED : ** SEE ALSO : -** LAST MODIFIED : Mar 28, 2003 +** LAST MODIFIED : Apr 28, 2003 **========================================================================= */ @@ -224,8 +301,22 @@ double client_message::classify(String host, UTC arriv, String serv) while (input >> line && certainty < 0.9) { - std::cout << " testing: " << line << "\n"; - if (line == re_syslog) + if (verbose) + { + std::cout << " testing: " << line << "\n"; + } + + if (line == re_xml_header) + { + certainty = 1.0; + classification = XML; + if (verbose) + { + std::cout << "XML input detected.\n"; + } + readXMLinput(line); + } + else if (line == re_syslog) { certainty = 1.0; classification = SYSLOG; @@ -309,6 +400,178 @@ double client_message::classify(String host, UTC arriv, String serv) } /*========================================================================= +** NAME : enterXML +** SYNOPSIS : int enterXML() +** PARAMETERS : +** RETURN VALUE : None +** +** DESCRIPTION : Analyze the DOM tree from the XML input. +** The DOM tree was previously parsed by readXMLinput(). +** +** VARS USED : +** VARS CHANGED : +** FUNCTIONS USED : +** SEE ALSO : +** LAST MODIFIED : Apr 29, 2003 +**========================================================================= +*/ + +void client_message::enterXML() +{ + xmlXPathObjectPtr res; + xmlXPathContextPtr pathcontext; + + /* Try to find the host in the database */ + + String objectid; + + objectid = database.find_host(hostname); + if (objectid == "") + { + std::cerr << "Please define the host " << hostname << " in the database.\n"; + return; + } + if (verbose) + { + std::cout << "Object id for " << hostname << " is " << objectid << "\n"; + } + + pathcontext = xmlXPathNewContext(xmlDom); + pathcontext->node = xmlDocGetRootElement(xmlDom); + res = xmlXPathEval((const xmlChar *)"data/node()", pathcontext); + + if (res->nodesetval != NULL) + { + // Find the first child element of the element. + + xmlNodePtr node = *res->nodesetval->nodeTab; + while (node->type != XML_ELEMENT_NODE) + { + node = node->next; + } + if (strcmp((char *)node->name, "log") == 0) + { + // Each child contains a log entry, raw or cooked. + + node = node->children; + while (node != NULL) + { + if (node->type == XML_ELEMENT_NODE) + { + if (strcmp((char *)node->name, "raw") == 0) + { + std::cerr << "Can not cook log elements yet.\n"; + } + else if (strcmp((char *)node->name, "cooked") == 0) + { + // Find the parts of the log entry + + xmlNodePtr item; + String log_hostname; + UTC log_date; + String raw("");; + String log_service; + + if (verbose) + { + std::cout << "Analyzing cooked element.\n"; + } + pathcontext->node = node; + + res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + log_hostname = (const char *)item->content; + if (log_hostname != hostname(0, ~log_hostname)) + { + std::cerr << "Hostname " << log_hostname << " does not match.\n"; + log_hostname = ""; + } + } + else + { + log_hostname = hostname; + } + + res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + log_service = (const char *)item->content; + } + else + { + log_service = service; + } + + res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + log_date = String((const char *)item->content); + } + else + { + std::cerr << " missing from cooked log element.\n"; + } + + res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext); + if (res->nodesetval != NULL) + { + item = *res->nodesetval->nodeTab; + raw = String((const char *)item->content); + } + else + { + std::cerr << " missing from cooked log element.\n"; + } + + if (raw != "" && log_hostname != "" && log_date.proper()) + { + String insertion("insert into log (objectid, servicecode," + " object_timestamp, timestamp, rawdata, processed) values ("); + + /* Insert a new record into the log table */ + + insertion += "'" + objectid + "',"; + insertion += "'" + log_service + "',"; + insertion += "'" + log_date.format("%Y-%m-%d %T") + "',"; + insertion += "'" + arrival.format("%Y-%m-%d %T") + "',"; + insertion += "'" + SQL_Escape(raw) + "',FALSE"; + insertion += ")"; + + if (testmode) + { + std::cout << insertion << "\n"; + } + else + { + database.Query(insertion); + } + + if (verbose) + { + std::cout << "\n\n"; + } + } + } + } + node = node->next; + } + } + else + { + std::cerr << "Data element " << node->name << " is not supported.\n"; + } + } + else + { + std::cerr << "Data node not found.\n"; + } +} + +/*========================================================================= ** NAME : enter ** SYNOPSIS : int enter() ** PARAMETERS : @@ -326,6 +589,12 @@ double client_message::classify(String host, UTC arriv, String serv) int client_message::enter() { + if (classification == XML) + { + enterXML(); + return 1; + } + long nr_lines = 0; String line; String qry; diff --git a/src/gcm_input/message.h b/src/gcm_input/message.h index c3c9ff1..34c8eb8 100644 --- a/src/gcm_input/message.h +++ b/src/gcm_input/message.h @@ -8,7 +8,7 @@ *********************** ** FILE NAME : message.h ** SYSTEM NAME : -** VERSION NUMBER : $Revision: 1.5 $ +** VERSION NUMBER : $Revision: 1.6 $ ** ** DESCRIPTION : Classes to for handling client messages ** @@ -20,13 +20,17 @@ ******************************** ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl ** CREATION DATE : Sep 16, 2002 -** LAST UPDATE : Feb 28, 2003 +** LAST UPDATE : Apr 28, 2003 ** MODIFICATIONS : **************************************************************************/ /***************************** $Log: message.h,v $ - Revision 1.5 2003-03-16 09:42:40 arjen + Revision 1.6 2003-04-29 09:16:44 arjen + Read XML input, + Only cooked log entries for now. + + Revision 1.5 2003/03/16 09:42:40 arjen Read IRIX system logs. Revision 1.4 2002/12/06 22:26:28 arjen @@ -47,13 +51,15 @@ *****************************/ -/* static const char *RCSID = "$Id: message.h,v 1.5 2003-03-16 09:42:40 arjen Exp $"; */ +/* static const char *RCSID = "$Id: message.h,v 1.6 2003-04-29 09:16:44 arjen Exp $"; */ #include #include #include #include +#include + #include "database.h" /* @@ -135,7 +141,7 @@ public: // // RELATIONS : // SEE ALSO : -// LAST MODIFIED : Feb 28, 2003 +// LAST MODIFIED : Apr 28, 2003 /////////////////////////////////////////////////////////////////////////// */ @@ -148,16 +154,22 @@ class client_message bool mail_header; // Does the message contain a mail header ? bool gpg_encrypted; // Is the message encrypted ? + xmlDocPtr xmlDom; + double certainty; // How certain are we about the message enum { - UNKNOWN, SYSLOG, SYSLOG_IRIX, ACCESSLOG, ERRORLOG, RPMLIST + UNKNOWN, SYSLOG, SYSLOG_IRIX, ACCESSLOG, ERRORLOG, RPMLIST, + XML } classification; message_buffer input; gnucomo_database database; + int readXMLinput(String first_line); + void enterXML(); + public: client_message(std::istream *in, gnucomo_database db); -- 2.11.0