2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.14 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Jul 24, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.14 2003-09-01 06:57:14 arjen
30 Reject log entries that are found to be invalid.
32 Revision 1.13 2003/08/16 15:28:45 arjen
33 Fixed a namespace problem
35 Revision 1.12 2003/08/11 16:56:16 arjen
36 Different kinds of log files are parsed by a collection of objects
37 of different classes, derived from the base class line_cooker
38 Depending on the message content or the message_type element in
39 XML, one of these objects is selected.
41 Logrunner is integrated with gcm_input. Although its functionality
42 is still limited, a connection between logrunner and gcm_input
45 Revision 1.11 2003/08/05 08:15:00 arjen
46 Debug output to the log stream instead of cerr.
47 Fixed namespace problems in XPath searches of the DOM.
48 Moved string utility functions to a separate file.
50 Revision 1.10 2003/04/29 09:16:44 arjen
52 Only cooked log entries for now.
54 Revision 1.9 2003/03/29 09:04:10 arjen
55 Extract the hostname out of the 'From:' or 'Message-Id:' line
58 Revision 1.8 2003/03/16 09:42:40 arjen
59 Read IRIX system logs.
61 Revision 1.7 2003/02/21 08:08:05 arjen
62 Gcm_input also detects packages that are removed from the system.
63 Determining the version number of a package in a RPM
64 list is improved. Only the last one or two parts of the string that
65 begin with a '-' and a number are considered the version.
67 Revision 1.6 2003/02/05 09:37:51 arjen
68 Create notifications when a new package is discovered
69 in a 'rpm -qa' list or when the version of a package is changed.
71 Revision 1.4 2002/12/06 22:26:28 arjen
72 Set the value of log.processed to FALSE when inserting a
73 new log entry into the database
74 When a syslog entry arrives from last year, gcm_input subtracts one from the
75 year of arrival to create the year of the log entry.
76 Read output from "rpm -qa" and enter packages in the parameter table.
78 Revision 1.3 2002/11/09 08:04:27 arjen
79 Added a reference to the GPL
81 Revision 1.2 2002/11/04 10:13:36 arjen
82 Use proper namespace for iostream classes
84 Revision 1.1 2002/10/05 10:25:49 arjen
85 Creation of gcm_input and a first approach to a web interface
87 *****************************/
89 static const char *RCSID = "$Id: message.cpp,v 1.14 2003-09-01 06:57:14 arjen Exp $";
92 #include <libxml/xpath.h>
93 #include <libxml/debugXML.h>
98 extern bool verbose; /* Defined in the main application */
100 extern bool incremental;
101 extern std::ostream *log;
103 /* Utility functions */
105 extern String SQL_Escape(String s);
107 /*=========================================================================
108 ** NAME : operator >>
109 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
111 ** RETURN VALUE : True if input was available.
113 ** DESCRIPTION : Input operator. Read the next line from the message.
119 ** LAST MODIFIED : Nov 04, 2002
120 **=========================================================================
123 bool operator >> (message_buffer &b, String &s)
125 bool input_ok = false;
127 if (b.next_line == b.buffer.end())
133 b.buffer.push_back(l);
135 // next_line keeps pointing to the end.
150 client_message::client_message(std::istream *in, gnucomo_database db)
157 gpg_encrypted = false;
158 classification = UNKNOWN;
163 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
164 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
166 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
167 static const regex re_dump("^ *DUMP: Date of this level");
168 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
170 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
171 static const regex re_mail_From("^From:[[:blank:]]+");
172 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
173 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
174 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
175 static const regex re_email_user("[[:alnum:]_.-]+@");
176 static const regex re_xml_header("xml .*\?>$");
178 /*=========================================================================
179 ** NAME : readXMLinput
180 ** SYNOPSIS : int readXMLinput(String first_line)
182 ** RETURN VALUE : Parse the XML input and extract the header information
190 ** LAST MODIFIED : Jul 24, 2003
191 **=========================================================================
194 int client_message::readXMLinput(String first_line)
196 xmlParserCtxtPtr ctxt;
198 xmlNodePtr root, item;
199 xmlNsPtr namespaces[1];
201 xmlXPathObjectPtr res;
202 xmlXPathContextPtr pathcontext;
205 ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL);
206 while (input >> line)
208 xmlParseChunk(ctxt, line, ~line, 0);
210 xmlParseChunk(ctxt, "", 0, 1);
211 xmlDom = ctxt->myDoc;
212 xmlFreeParserCtxt(ctxt);
214 root = xmlDocGetRootElement(xmlDom);
215 namespaces[0] = root->ns;
217 //TODO Ought to check root->name and root->ns->href
219 pathcontext = xmlXPathNewContext(xmlDom);
220 pathcontext->node = xmlDocGetRootElement(xmlDom);
221 pathcontext->namespaces = namespaces;
222 pathcontext->nsNr = 1;
225 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
228 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
229 if (res->nodesetval != NULL)
232 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
234 item = *res->nodesetval->nodeTab;
236 // Select a line cooker based on the message type.
239 std::cout << "Looking for a line cooker for " << item->content << "\n";
241 std::list<line_cooker *>::iterator lci = kitchen.begin();
243 while (pan == 0 && lci != kitchen.end())
246 if (pan->message_type() != (const char *)(item->content))
254 *log << "Can not find a line cooker for message type " << item->content << "\n";
259 *log << "Message type not found in XML header.\n";
262 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
263 if (res->nodesetval != NULL)
266 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
268 item = *res->nodesetval->nodeTab;
269 hostname = (const char *)item->content;
273 *log << "Hostname not found in XML header.\n";
276 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
277 if (res->nodesetval != NULL)
279 item = *res->nodesetval->nodeTab;
280 service = (const char *)item->content;
282 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
283 if (res->nodesetval != NULL)
285 item = *res->nodesetval->nodeTab;
286 arrival = String((char *)item->content);
289 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
293 /*=========================================================================
295 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
297 ** RETURN VALUE : The certainty with which the message is classified.
305 ** LAST MODIFIED : Aug 11, 2003
306 **=========================================================================
309 double client_message::classify(String host, UTC arriv, String serv)
317 /* First, check if the message has a mail header. */
319 if (input >> line && line == re_uxmail_from)
325 /* Scan ahead for the hostname and date of arrival. */
327 while (input >> line && line != "")
329 if (line == re_mail_From)
331 from_address = line(re_email_address);
332 from_address(re_email_user) = ""; // Remove the user part;
333 if (from_address != "" && ~hostname < ~from_address)
335 *log << "Detected hostname " << from_address << "\n";
336 hostname = from_address;
339 if (line == re_mail_MsId)
341 from_address = line(re_email_address);
342 from_address(re_email_user) = ""; // Remove the user part;
343 if (from_address != "" && ~hostname < ~from_address)
345 *log << "Detected hostname " << from_address << "\n";
346 hostname = from_address;
349 if (line == re_mail_Date)
351 arrival = UTC(line(regex(mail_date_re)));
357 // Push the first line back, we need to read it again.
365 * Now that we have the mail header out of the way, try to figure
366 * out what the content of the message is.
370 while (input >> line && certainty < 0.9)
374 *log << " testing: " << line << "\n";
377 if (line == re_xml_header)
380 classification = XML;
383 *log << "XML input detected.\n";
387 else if (line == re_PGP)
390 gpg_encrypted = true;
391 *log << "The message is PGP/GnuPG encrypted.\n";
393 else if (line == re_dump)
398 *log << "DUMP output detected.\n";
401 else if (line == re_rpm)
404 classification = RPMLIST;
408 *log << "RPM package list detected.\n";
413 // Scan the list of line cookers if there is anything familiar.
415 std::list<line_cooker *>::iterator lci = kitchen.begin();
417 while (pan == 0 && lci != kitchen.end())
420 if (!pan->check_pattern(line))
429 classification = COOKER_OBJECT;
432 *log << "Detected message type " << pan->message_type() << "\n";
441 *log << "Can not determine the hostname where the message came from.\n";
444 else if (!arrival.proper())
446 *log << "Arrival time is not known.\n";
457 /*=========================================================================
459 ** SYNOPSIS : int enterXML()
461 ** RETURN VALUE : None
463 ** DESCRIPTION : Analyze the DOM tree from the XML input.
464 ** The DOM tree was previously parsed by readXMLinput().
470 ** LAST MODIFIED : Jul 24, 2003
471 **=========================================================================
474 void client_message::enterXML()
476 xmlXPathObjectPtr res;
477 xmlXPathContextPtr pathcontext;
478 xmlNsPtr namespaces[1];
480 /* Try to find the host in the database */
484 objectid = database.find_host(hostname);
487 *log << "Please define the host " << hostname << " in the database.\n";
492 *log << "Object id for " << hostname << " is " << objectid << "\n";
495 pathcontext = xmlXPathNewContext(xmlDom);
496 pathcontext->node = xmlDocGetRootElement(xmlDom);
497 namespaces[0] = pathcontext->node->ns;
498 pathcontext->namespaces = namespaces;
499 pathcontext->nsNr = 1;
501 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
503 if (res->nodesetval != NULL)
505 // Find the first child element of the <data> element.
507 xmlNodePtr node = *res->nodesetval->nodeTab;
508 while (node->type != XML_ELEMENT_NODE)
512 if (strcmp((char *)node->name, "log") == 0)
514 // Each child contains a log entry, raw or cooked.
516 node = node->children;
519 if (node->type == XML_ELEMENT_NODE)
527 if (strcmp((char *)node->name, "raw") == 0)
529 item = node->children;
532 *log << "Can not cook this type of <raw> log element.\n";
536 raw = String((const char *)item->content);
537 if (pan->cook_this(raw, arrival))
539 log_hostname = pan->hostname();
540 if (log_hostname == "")
542 log_hostname = hostname;
544 log_service = pan->service();
545 log_date = pan->timestamp();
549 *log << "Log line " << raw << " does not match.\n";
554 else if (strcmp((char *)node->name, "cooked") == 0)
556 // Find the parts of the log entry
560 *log << "Analyzing cooked element.\n";
562 pathcontext->node = node;
564 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
565 if (res->nodesetval != NULL)
567 item = *res->nodesetval->nodeTab;
568 log_hostname = (const char *)item->content;
569 if (log_hostname != hostname(0, ~log_hostname))
571 *log << "Hostname " << log_hostname << " does not match.\n";
577 log_hostname = hostname;
580 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
581 if (res->nodesetval != NULL)
583 item = *res->nodesetval->nodeTab;
584 log_service = (const char *)item->content;
588 log_service = service;
591 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
592 if (res->nodesetval != NULL)
594 item = *res->nodesetval->nodeTab;
595 log_date = String((const char *)item->content);
599 *log << "<timestamp> missing from cooked log element.\n";
602 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
603 if (res->nodesetval != NULL)
605 item = *res->nodesetval->nodeTab;
606 raw = String((const char *)item->content);
610 *log << "<raw> missing from cooked log element.\n";
615 // Insert a new log record into the database.
616 if (raw != "" && log_hostname != "" && log_date.proper())
618 String insertion("insert into log (objectid, servicecode,"
619 " object_timestamp, timestamp, rawdata, processed) values (");
621 /* Insert a new record into the log table */
623 insertion += "'" + objectid + "',";
624 insertion += "'" + log_service + "',";
625 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
626 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
627 insertion += "'" + SQL_Escape(raw) + "',FALSE";
632 *log << insertion << "\n";
636 database.Query(insertion);
651 *log << "Data element " << node->name << " is not supported.\n";
656 *log << "Data node not found.\n";
660 /*=========================================================================
662 ** SYNOPSIS : int enter()
664 ** RETURN VALUE : The number of lines successfully parsed from the input
672 ** LAST MODIFIED : Jul 24, 2003
673 **=========================================================================
676 int client_message::enter()
678 if (classification == XML)
688 String change_notification("");
689 String create_notification("");
690 bool initial_entry = false;
692 std::list<String> packages;
695 /* Double-check the classification of the message */
697 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
704 // Skip the mail header.
706 while (input >> line && line != "");
709 /* Try to find the host in the database */
713 objectid = database.find_host(hostname);
716 *log << "Please define the host " << hostname << " in the database.\n";
721 *log << "Object id for " << hostname << " is " << objectid << "\n";
724 if (classification == RPMLIST)
729 /* Read all packages, so we will know which ones are */
730 /* missing at the end. */
732 qry = "select name from parameter where objectid='";
733 qry += objectid + "' and class='package'";
734 n_packages = database.Query(qry);
735 initial_entry = n_packages == 0;
738 *log << n_packages << " packages in database.\n";
740 for (int t = 0; t < n_packages; t++)
742 packages.push_back(database.Field(t, "name"));
745 *log << "Package list built: " << packages.size() << ".\n";
749 /* Scan the input line by line, entring records into the database */
751 String rest; // Rest of the line to be parsed
754 while (input >> line)
758 *log << line << "\n";
762 /* Check each line if it contains valid information */
766 switch (classification)
782 String insertion("insert into log (objectid, servicecode,"
783 " object_timestamp, timestamp, rawdata, processed) values (");
786 switch (classification)
790 std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
792 if (pan->cook_this(line, arrival))
794 if (pan->hostname() == hostname(0,~pan->hostname()))
798 std::cerr << " Information from cooker:\n";
799 std::cerr << " timestamp = " << pan->timestamp() << "\n";
800 std::cerr << " hostname = " << pan->hostname() << "\n";
801 std::cerr << " service = " << pan->service() << "\n";
803 /* Insert a new record into the log table */
805 insertion += "'" + objectid + "',";
806 insertion += "'" + pan->service() + "',";
807 insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
808 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
809 insertion += "'" + SQL_Escape(line) + "',FALSE";
814 *log << insertion << "\n";
818 database.Query(insertion);
830 *log << " Hostname " << pan->hostname() << " does not match.\n";
835 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
841 // Scan a list of packages and versions from "rpm -a".
842 // A similar listing can be created on IRIX 6.5 by using the
843 // command "showprods -3 -n|awk '{printf "%s-%s\n",$2,$3}'|grep -v '^[-=]' \
844 // |grep -v Version-Description".
846 // We have to separate the package name and the version.
847 // The separation is marked by a '-', followed by a digit.
848 // However, there may be other sequences of '-'digit in the package name,
849 // do we have to scan ahead until there is at most one such sequence
850 // left in the version string. The '-'digit seqeunce inside the
851 // version usually separates the version and the release number.
853 int version_start, next_version_start;
857 next_version_start = i;
859 while (i < ~line - 1)
861 while (i < ~line - 1 && !(line[i] == '-' && isdigit(line[i + 1])))
867 version_start = next_version_start;
868 next_version_start = i;
873 if (!isdigit(line[version_start + 1]))
875 version_start = next_version_start;
877 String package(line(0,version_start));
878 String version(line(version_start + 1, ~line));
885 *log << "Package is " << package;
886 *log << ", version is " << version << "\n";
889 // Construct a qry to check the package's existance
891 qry = "select paramid from parameter where objectid='";
892 qry += objectid + "' and class='package' and name='";
893 qry += package + "'";
895 if (database.Query(qry) == 1)
897 std::list<String>::iterator lp;
899 lp = find(packages.begin(), packages.end(), package);
900 if (lp != packages.end())
906 *log << "Could NOT find " << package << " in list.\n";
909 paramid = database.Field(0, "paramid");
910 qry = "select value from property where paramid='";
911 qry += paramid + "' and name='version'";
912 if (database.Query(qry) == 0)
914 *log << "Database corruption: Package " << package;
915 *log << " does not have a 'version' property.\n";
917 else if (database.Field(0, "value") != version)
921 *log << " Parameter " << package << " has different version\n";
923 insertion = "update property set value='";
924 insertion += version + "' where paramid='";
925 insertion += paramid + "' and name='version'";
927 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
928 insert_h += " values ('";
929 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'MODIFIED', 'version', '";
930 insert_h += version + "')";
932 database.Query(insertion);
933 database.Query(insert_h);
935 if (change_notification == "")
937 remark = "Gnucomo detected a different version for package parameter(s) ";
938 change_notification = database.new_notification(objectid, "property modified", remark);
941 if (change_notification != "")
943 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
944 insertion += change_notification + "', '";
945 insertion += paramid + "')";
947 database.Query(insertion);
951 *log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
958 *log << " Parameter " << package << " has not changed.\n";
967 *log << " Parameter " << package << " does not exist.\n";
969 // Create a new package parameter, including version property and history record
971 insertion = "insert into parameter (objectid, name, class, description) values ('";
972 insertion += objectid + "', '" + package + "', 'package', 'RPM package " + package + "')";
976 *log << insertion << "\n";
980 database.Query(insertion);
981 qry = "select paramid from parameter where objectid='";
982 qry += objectid + "' and class='package' and name='";
983 qry += package + "'";
985 paramid = database.Field(0, "paramid");
988 insertion = "insert into property (paramid, name, value, type) values ('";
989 insertion += paramid + "', 'version', '";
990 insertion += version + "', 'STATIC')";
991 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
992 insert_h += " values ('";
993 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'CREATED', 'version', '";
994 insert_h += version + "')";
998 *log << insertion << "\n" << insert_h << "\n";
1002 database.Query(insertion);
1003 database.Query(insert_h);
1006 if (create_notification == "")
1008 remark = "Gnucomo detected new parameter(s) of class package";
1009 create_notification = database.new_notification(objectid, "parameter created", remark);
1011 if (create_notification != "")
1013 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
1014 insertion += create_notification + "', '";
1015 insertion += paramid + "')";
1017 database.Query(insertion);
1021 *log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
1039 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
1043 if (classification == RPMLIST && !incremental)
1045 std::list<String>::iterator lp;
1046 String remove_notification("");
1049 * If there are any packages left in the list, they seem to have
1050 * disappeared from the system.
1053 for (lp = packages.begin(); lp != packages.end(); lp++)
1059 // Construct a qry to check the package's existance
1061 qry = "select paramid from parameter where objectid='";
1062 qry += objectid + "' and class='package' and name='";
1065 if (database.Query(qry) == 1)
1067 paramid = database.Field(0, "paramid");
1068 qry ="select change_nature from history where paramid='";
1069 qry += paramid + "' order by modified desc";
1070 if (database.Query(qry) <= 0)
1072 *log << "Database ERROR: no history record for parameter " << *lp << ".\n";
1074 else if (database.Field(0, "change_nature") != "REMOVED")
1078 *log << "Removing parameter " << *lp << ".\n";
1081 insert = "insert into history (paramid, modified, change_nature)";
1082 insert += " values ('";
1083 insert += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'REMOVED')";
1085 database.Query(insert);
1087 if (remove_notification == "")
1089 remark = "Gnucomo detected that package(s) have disappeared ";
1090 remove_notification = database.new_notification(objectid, "parameter removed", remark);
1093 if (remove_notification != "")
1095 insert = "insert into parameter_notification (notificationid, paramid) values ('";
1096 insert += remove_notification + "', '";
1097 insert += paramid + "')";
1099 database.Query(insert);
1103 *log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
1112 *log << nr_lines << " lines parsed from the log file.\n";