2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.12 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Jul 24, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.12 2003-08-11 16:56:16 arjen
30 Different kinds of log files are parsed by a collection of objects
31 of different classes, derived from the base class line_cooker
32 Depending on the message content or the message_type element in
33 XML, one of these objects is selected.
35 Logrunner is integrated with gcm_input. Although its functionality
36 is still limited, a connection between logrunner and gcm_input
39 Revision 1.11 2003/08/05 08:15:00 arjen
40 Debug output to the log stream instead of cerr.
41 Fixed namespace problems in XPath searches of the DOM.
42 Moved string utility functions to a separate file.
44 Revision 1.10 2003/04/29 09:16:44 arjen
46 Only cooked log entries for now.
48 Revision 1.9 2003/03/29 09:04:10 arjen
49 Extract the hostname out of the 'From:' or 'Message-Id:' line
52 Revision 1.8 2003/03/16 09:42:40 arjen
53 Read IRIX system logs.
55 Revision 1.7 2003/02/21 08:08:05 arjen
56 Gcm_input also detects packages that are removed from the system.
57 Determining the version number of a package in a RPM
58 list is improved. Only the last one or two parts of the string that
59 begin with a '-' and a number are considered the version.
61 Revision 1.6 2003/02/05 09:37:51 arjen
62 Create notifications when a new package is discovered
63 in a 'rpm -qa' list or when the version of a package is changed.
65 Revision 1.4 2002/12/06 22:26:28 arjen
66 Set the value of log.processed to FALSE when inserting a
67 new log entry into the database
68 When a syslog entry arrives from last year, gcm_input subtracts one from the
69 year of arrival to create the year of the log entry.
70 Read output from "rpm -qa" and enter packages in the parameter table.
72 Revision 1.3 2002/11/09 08:04:27 arjen
73 Added a reference to the GPL
75 Revision 1.2 2002/11/04 10:13:36 arjen
76 Use proper namespace for iostream classes
78 Revision 1.1 2002/10/05 10:25:49 arjen
79 Creation of gcm_input and a first approach to a web interface
81 *****************************/
83 static const char *RCSID = "$Id: message.cpp,v 1.12 2003-08-11 16:56:16 arjen Exp $";
86 #include <libxml/xpath.h>
87 #include <libxml/debugXML.h>
92 extern bool verbose; /* Defined in the main application */
94 extern bool incremental;
95 extern std::ostream *log;
97 /* Utility functions */
99 extern String SQL_Escape(String s);
101 /*=========================================================================
102 ** NAME : operator >>
103 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
105 ** RETURN VALUE : True if input was available.
107 ** DESCRIPTION : Input operator. Read the next line from the message.
113 ** LAST MODIFIED : Nov 04, 2002
114 **=========================================================================
117 bool operator >> (message_buffer &b, String &s)
119 bool input_ok = false;
121 if (b.next_line == b.buffer.end())
127 b.buffer.push_back(l);
129 // next_line keeps pointing to the end.
144 client_message::client_message(std::istream *in, gnucomo_database db)
151 gpg_encrypted = false;
152 classification = UNKNOWN;
157 static const String syslog_date_re("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
158 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
159 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
160 static const String email_address_re("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
162 static const regex re_syslog(syslog_date_re + " [[:alnum:]]+ [[:alpha:]]+.*:.+");
163 static const regex re_syslog_irix(syslog_date_re + " [0-7][A-T]:[[:alnum:]]+ [[:alpha:]]+.*:.+");
164 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
165 static const regex re_dump("^ *DUMP: Date of this level");
166 static const regex re_accesslog("(GET|POST) .+ HTTP");
167 static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
168 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
170 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
171 static const regex re_mail_From("^From:[[:blank:]]+");
172 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
173 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
174 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
175 static const regex re_email_user("[[:alnum:]_.-]+@");
176 static const regex re_xml_header("xml .*\?>$");
178 /*=========================================================================
179 ** NAME : readXMLinput
180 ** SYNOPSIS : int readXMLinput(String first_line)
182 ** RETURN VALUE : Parse the XML input and extract the header information
190 ** LAST MODIFIED : Jul 24, 2003
191 **=========================================================================
194 int client_message::readXMLinput(String first_line)
196 xmlParserCtxtPtr ctxt;
198 xmlNodePtr root, item;
199 xmlNsPtr namespaces[1];
201 xmlXPathObjectPtr res;
202 xmlXPathContextPtr pathcontext;
205 ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL);
206 while (input >> line)
208 xmlParseChunk(ctxt, line, ~line, 0);
210 xmlParseChunk(ctxt, "", 0, 1);
211 xmlDom = ctxt->myDoc;
212 xmlFreeParserCtxt(ctxt);
214 root = xmlDocGetRootElement(xmlDom);
215 namespaces[0] = root->ns;
217 //TODO Ought to check root->name and root->ns->href
219 pathcontext = xmlXPathNewContext(xmlDom);
220 pathcontext->node = xmlDocGetRootElement(xmlDom);
221 pathcontext->namespaces = namespaces;
222 pathcontext->nsNr = 1;
225 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
228 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
229 if (res->nodesetval != NULL)
232 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
234 item = *res->nodesetval->nodeTab;
236 // Select a line cooker based on the message type.
239 std::cout << "Looking for a line cooker for " << item->content << "\n";
241 list<line_cooker *>::iterator lci = kitchen.begin();
243 while (pan == 0 && lci != kitchen.end())
246 if (pan->message_type() != (const char *)(item->content))
254 *log << "Can not find a line cooker for message type " << item->content << "\n";
259 *log << "Message type not found in XML header.\n";
262 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
263 if (res->nodesetval != NULL)
266 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
268 item = *res->nodesetval->nodeTab;
269 hostname = (const char *)item->content;
273 *log << "Hostname not found in XML header.\n";
276 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
277 if (res->nodesetval != NULL)
279 item = *res->nodesetval->nodeTab;
280 service = (const char *)item->content;
282 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
283 if (res->nodesetval != NULL)
285 item = *res->nodesetval->nodeTab;
286 arrival = String((char *)item->content);
289 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
293 /*=========================================================================
295 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
297 ** RETURN VALUE : The certainty with which the message is classified.
305 ** LAST MODIFIED : Aug 11, 2003
306 **=========================================================================
309 double client_message::classify(String host, UTC arriv, String serv)
317 /* First, check if the message has a mail header. */
319 if (input >> line && line == re_uxmail_from)
325 /* Scan ahead for the hostname and date of arrival. */
327 while (input >> line && line != "")
329 if (line == re_mail_From)
331 from_address = line(re_email_address);
332 from_address(re_email_user) = ""; // Remove the user part;
333 if (from_address != "" && ~hostname < ~from_address)
335 *log << "Detected hostname " << from_address << "\n";
336 hostname = from_address;
339 if (line == re_mail_MsId)
341 from_address = line(re_email_address);
342 from_address(re_email_user) = ""; // Remove the user part;
343 if (from_address != "" && ~hostname < ~from_address)
345 *log << "Detected hostname " << from_address << "\n";
346 hostname = from_address;
349 if (line == re_mail_Date)
351 arrival = UTC(line(regex(mail_date_re)));
357 // Push the first line back, we need to read it again.
365 * Now that we have the mail header out of the way, try to figure
366 * out what the content of the message is.
370 while (input >> line && certainty < 0.9)
374 *log << " testing: " << line << "\n";
377 if (line == re_xml_header)
380 classification = XML;
383 *log << "XML input detected.\n";
387 else if (line == re_PGP)
390 gpg_encrypted = true;
391 *log << "The message is PGP/GnuPG encrypted.\n";
393 else if (line == re_dump)
398 *log << "DUMP output detected.\n";
401 else if (line == re_rpm)
404 classification = RPMLIST;
408 *log << "RPM package list detected.\n";
413 // Scan the list of line cookers if there is anything familiar.
415 list<line_cooker *>::iterator lci = kitchen.begin();
417 while (pan == 0 && lci != kitchen.end())
420 if (!pan->check_pattern(line))
429 classification = COOKER_OBJECT;
432 *log << "Detected message type " << pan->message_type() << "\n";
441 *log << "Can not determine the hostname where the message came from.\n";
444 else if (!arrival.proper())
446 *log << "Arrival time is not knwon.\n";
457 /*=========================================================================
459 ** SYNOPSIS : int enterXML()
461 ** RETURN VALUE : None
463 ** DESCRIPTION : Analyze the DOM tree from the XML input.
464 ** The DOM tree was previously parsed by readXMLinput().
470 ** LAST MODIFIED : Jul 24, 2003
471 **=========================================================================
474 void client_message::enterXML()
476 xmlXPathObjectPtr res;
477 xmlXPathContextPtr pathcontext;
478 xmlNsPtr namespaces[1];
480 /* Try to find the host in the database */
484 objectid = database.find_host(hostname);
487 *log << "Please define the host " << hostname << " in the database.\n";
492 *log << "Object id for " << hostname << " is " << objectid << "\n";
495 pathcontext = xmlXPathNewContext(xmlDom);
496 pathcontext->node = xmlDocGetRootElement(xmlDom);
497 namespaces[0] = pathcontext->node->ns;
498 pathcontext->namespaces = namespaces;
499 pathcontext->nsNr = 1;
501 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
503 if (res->nodesetval != NULL)
505 // Find the first child element of the <data> element.
507 xmlNodePtr node = *res->nodesetval->nodeTab;
508 while (node->type != XML_ELEMENT_NODE)
512 if (strcmp((char *)node->name, "log") == 0)
514 // Each child contains a log entry, raw or cooked.
516 node = node->children;
519 if (node->type == XML_ELEMENT_NODE)
527 if (strcmp((char *)node->name, "raw") == 0)
529 item = node->children;
532 *log << "Can not cook this type of <raw> log element.\n";
536 raw = String((const char *)item->content);
537 if (pan->cook_this(raw, arrival))
539 log_hostname = pan->hostname();
540 if (log_hostname == "")
542 log_hostname = hostname;
544 log_service = pan->service();
545 log_date = pan->timestamp();
549 *log << "Log line " << raw << " does not match.\n";
554 else if (strcmp((char *)node->name, "cooked") == 0)
556 // Find the parts of the log entry
560 *log << "Analyzing cooked element.\n";
562 pathcontext->node = node;
564 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
565 if (res->nodesetval != NULL)
567 item = *res->nodesetval->nodeTab;
568 log_hostname = (const char *)item->content;
569 if (log_hostname != hostname(0, ~log_hostname))
571 *log << "Hostname " << log_hostname << " does not match.\n";
577 log_hostname = hostname;
580 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
581 if (res->nodesetval != NULL)
583 item = *res->nodesetval->nodeTab;
584 log_service = (const char *)item->content;
588 log_service = service;
591 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
592 if (res->nodesetval != NULL)
594 item = *res->nodesetval->nodeTab;
595 log_date = String((const char *)item->content);
599 *log << "<timestamp> missing from cooked log element.\n";
602 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
603 if (res->nodesetval != NULL)
605 item = *res->nodesetval->nodeTab;
606 raw = String((const char *)item->content);
610 *log << "<raw> missing from cooked log element.\n";
615 // Insert a new log record into the database.
616 if (raw != "" && log_hostname != "" && log_date.proper())
618 String insertion("insert into log (objectid, servicecode,"
619 " object_timestamp, timestamp, rawdata, processed) values (");
621 /* Insert a new record into the log table */
623 insertion += "'" + objectid + "',";
624 insertion += "'" + log_service + "',";
625 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
626 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
627 insertion += "'" + SQL_Escape(raw) + "',FALSE";
632 *log << insertion << "\n";
636 database.Query(insertion);
651 *log << "Data element " << node->name << " is not supported.\n";
656 *log << "Data node not found.\n";
660 /*=========================================================================
662 ** SYNOPSIS : int enter()
664 ** RETURN VALUE : The number of lines successfully parsed from the input
672 ** LAST MODIFIED : Jul 24, 2003
673 **=========================================================================
676 int client_message::enter()
678 if (classification == XML)
688 String change_notification("");
689 String create_notification("");
690 bool initial_entry = false;
692 std::list<String> packages;
695 /* Double-check the classification of the message */
697 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
704 // Skip the mail header.
706 while (input >> line && line != "");
709 /* Try to find the host in the database */
713 objectid = database.find_host(hostname);
716 *log << "Please define the host " << hostname << " in the database.\n";
721 *log << "Object id for " << hostname << " is " << objectid << "\n";
724 if (classification == RPMLIST)
729 /* Read all packages, so we will know which ones are */
730 /* missing at the end. */
732 qry = "select name from parameter where objectid='";
733 qry += objectid + "' and class='package'";
734 n_packages = database.Query(qry);
735 initial_entry = n_packages == 0;
738 *log << n_packages << " packages in database.\n";
740 for (int t = 0; t < n_packages; t++)
742 packages.push_back(database.Field(t, "name"));
745 *log << "Package list built: " << packages.size() << ".\n";
749 /* Scan the input line by line, entring records into the database */
751 String rest; // Rest of the line to be parsed
754 while (input >> line)
758 *log << line << "\n";
762 /* Check each line if it contains valid information */
766 switch (classification)
782 String insertion("insert into log (objectid, servicecode,"
783 " object_timestamp, timestamp, rawdata, processed) values (");
786 switch (classification)
790 std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
792 pan->cook_this(line, arrival);
794 if (pan->hostname() == hostname(0,~pan->hostname()))
798 std::cerr << " Information from cooker:\n";
799 std::cerr << " timestamp = " << pan->timestamp() << "\n";
800 std::cerr << " hostname = " << pan->hostname() << "\n";
801 std::cerr << " service = " << pan->service() << "\n";
803 /* Insert a new record into the log table */
805 insertion += "'" + objectid + "',";
806 insertion += "'" + pan->service() + "',";
807 insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
808 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
809 insertion += "'" + SQL_Escape(line) + "',FALSE";
814 *log << insertion << "\n";
818 database.Query(insertion);
830 *log << " Hostname " << pan->hostname() << " does not match.\n";
836 // Scan a list of packages and versions from "rpm -a".
837 // A similar listing can be created on IRIX 6.5 by using the
838 // command "showprods -3 -n|awk '{printf "%s-%s\n",$2,$3}'|grep -v '^[-=]' \
839 // |grep -v Version-Description".
841 // We have to separate the package name and the version.
842 // The separation is marked by a '-', followed by a digit.
843 // However, there may be other sequences of '-'digit in the package name,
844 // do we have to scan ahead until there is at most one such sequence
845 // left in the version string. The '-'digit seqeunce inside the
846 // version usually separates the version and the release number.
848 int version_start, next_version_start;
852 next_version_start = i;
854 while (i < ~line - 1)
856 while (i < ~line - 1 && !(line[i] == '-' && isdigit(line[i + 1])))
862 version_start = next_version_start;
863 next_version_start = i;
868 if (!isdigit(line[version_start + 1]))
870 version_start = next_version_start;
872 String package(line(0,version_start));
873 String version(line(version_start + 1, ~line));
880 *log << "Package is " << package;
881 *log << ", version is " << version << "\n";
884 // Construct a qry to check the package's existance
886 qry = "select paramid from parameter where objectid='";
887 qry += objectid + "' and class='package' and name='";
888 qry += package + "'";
890 if (database.Query(qry) == 1)
892 std::list<String>::iterator lp;
894 lp = find(packages.begin(), packages.end(), package);
895 if (lp != packages.end())
901 *log << "Could NOT find " << package << " in list.\n";
904 paramid = database.Field(0, "paramid");
905 qry = "select value from property where paramid='";
906 qry += paramid + "' and name='version'";
907 if (database.Query(qry) == 0)
909 *log << "Database corruption: Package " << package;
910 *log << " does not have a 'version' property.\n";
912 else if (database.Field(0, "value") != version)
916 *log << " Parameter " << package << " has different version\n";
918 insertion = "update property set value='";
919 insertion += version + "' where paramid='";
920 insertion += paramid + "' and name='version'";
922 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
923 insert_h += " values ('";
924 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'MODIFIED', 'version', '";
925 insert_h += version + "')";
927 database.Query(insertion);
928 database.Query(insert_h);
930 if (change_notification == "")
932 remark = "Gnucomo detected a different version for package parameter(s) ";
933 change_notification = database.new_notification(objectid, "property modified", remark);
936 if (change_notification != "")
938 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
939 insertion += change_notification + "', '";
940 insertion += paramid + "')";
942 database.Query(insertion);
946 *log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
953 *log << " Parameter " << package << " has not changed.\n";
962 *log << " Parameter " << package << " does not exist.\n";
964 // Create a new package parameter, including version property and history record
966 insertion = "insert into parameter (objectid, name, class, description) values ('";
967 insertion += objectid + "', '" + package + "', 'package', 'RPM package " + package + "')";
971 *log << insertion << "\n";
975 database.Query(insertion);
976 qry = "select paramid from parameter where objectid='";
977 qry += objectid + "' and class='package' and name='";
978 qry += package + "'";
980 paramid = database.Field(0, "paramid");
983 insertion = "insert into property (paramid, name, value, type) values ('";
984 insertion += paramid + "', 'version', '";
985 insertion += version + "', 'STATIC')";
986 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
987 insert_h += " values ('";
988 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'CREATED', 'version', '";
989 insert_h += version + "')";
993 *log << insertion << "\n" << insert_h << "\n";
997 database.Query(insertion);
998 database.Query(insert_h);
1001 if (create_notification == "")
1003 remark = "Gnucomo detected new parameter(s) of class package";
1004 create_notification = database.new_notification(objectid, "parameter created", remark);
1006 if (create_notification != "")
1008 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
1009 insertion += create_notification + "', '";
1010 insertion += paramid + "')";
1012 database.Query(insertion);
1016 *log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
1034 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
1038 if (classification == RPMLIST && !incremental)
1040 std::list<String>::iterator lp;
1041 String remove_notification("");
1044 * If there are any packages left in the list, they seem to have
1045 * disappeared from the system.
1048 for (lp = packages.begin(); lp != packages.end(); lp++)
1054 // Construct a qry to check the package's existance
1056 qry = "select paramid from parameter where objectid='";
1057 qry += objectid + "' and class='package' and name='";
1060 if (database.Query(qry) == 1)
1062 paramid = database.Field(0, "paramid");
1063 qry ="select change_nature from history where paramid='";
1064 qry += paramid + "' order by modified desc";
1065 if (database.Query(qry) <= 0)
1067 *log << "Database ERROR: no history record for parameter " << *lp << ".\n";
1069 else if (database.Field(0, "change_nature") != "REMOVED")
1073 *log << "Removing parameter " << *lp << ".\n";
1076 insert = "insert into history (paramid, modified, change_nature)";
1077 insert += " values ('";
1078 insert += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'REMOVED')";
1080 database.Query(insert);
1082 if (remove_notification == "")
1084 remark = "Gnucomo detected that package(s) have disappeared ";
1085 remove_notification = database.new_notification(objectid, "parameter removed", remark);
1088 if (remove_notification != "")
1090 insert = "insert into parameter_notification (notificationid, paramid) values ('";
1091 insert += remove_notification + "', '";
1092 insert += paramid + "')";
1094 database.Query(insert);
1098 *log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
1107 *log << nr_lines << " lines parsed from the log file.\n";