2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.13 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Jul 24, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.13 2003-08-16 15:28:45 arjen
30 Fixed a namespace problem
32 Revision 1.12 2003/08/11 16:56:16 arjen
33 Different kinds of log files are parsed by a collection of objects
34 of different classes, derived from the base class line_cooker
35 Depending on the message content or the message_type element in
36 XML, one of these objects is selected.
38 Logrunner is integrated with gcm_input. Although its functionality
39 is still limited, a connection between logrunner and gcm_input
42 Revision 1.11 2003/08/05 08:15:00 arjen
43 Debug output to the log stream instead of cerr.
44 Fixed namespace problems in XPath searches of the DOM.
45 Moved string utility functions to a separate file.
47 Revision 1.10 2003/04/29 09:16:44 arjen
49 Only cooked log entries for now.
51 Revision 1.9 2003/03/29 09:04:10 arjen
52 Extract the hostname out of the 'From:' or 'Message-Id:' line
55 Revision 1.8 2003/03/16 09:42:40 arjen
56 Read IRIX system logs.
58 Revision 1.7 2003/02/21 08:08:05 arjen
59 Gcm_input also detects packages that are removed from the system.
60 Determining the version number of a package in a RPM
61 list is improved. Only the last one or two parts of the string that
62 begin with a '-' and a number are considered the version.
64 Revision 1.6 2003/02/05 09:37:51 arjen
65 Create notifications when a new package is discovered
66 in a 'rpm -qa' list or when the version of a package is changed.
68 Revision 1.4 2002/12/06 22:26:28 arjen
69 Set the value of log.processed to FALSE when inserting a
70 new log entry into the database
71 When a syslog entry arrives from last year, gcm_input subtracts one from the
72 year of arrival to create the year of the log entry.
73 Read output from "rpm -qa" and enter packages in the parameter table.
75 Revision 1.3 2002/11/09 08:04:27 arjen
76 Added a reference to the GPL
78 Revision 1.2 2002/11/04 10:13:36 arjen
79 Use proper namespace for iostream classes
81 Revision 1.1 2002/10/05 10:25:49 arjen
82 Creation of gcm_input and a first approach to a web interface
84 *****************************/
86 static const char *RCSID = "$Id: message.cpp,v 1.13 2003-08-16 15:28:45 arjen Exp $";
89 #include <libxml/xpath.h>
90 #include <libxml/debugXML.h>
95 extern bool verbose; /* Defined in the main application */
97 extern bool incremental;
98 extern std::ostream *log;
100 /* Utility functions */
102 extern String SQL_Escape(String s);
104 /*=========================================================================
105 ** NAME : operator >>
106 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
108 ** RETURN VALUE : True if input was available.
110 ** DESCRIPTION : Input operator. Read the next line from the message.
116 ** LAST MODIFIED : Nov 04, 2002
117 **=========================================================================
120 bool operator >> (message_buffer &b, String &s)
122 bool input_ok = false;
124 if (b.next_line == b.buffer.end())
130 b.buffer.push_back(l);
132 // next_line keeps pointing to the end.
147 client_message::client_message(std::istream *in, gnucomo_database db)
154 gpg_encrypted = false;
155 classification = UNKNOWN;
160 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
161 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
163 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
164 static const regex re_dump("^ *DUMP: Date of this level");
165 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
167 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
168 static const regex re_mail_From("^From:[[:blank:]]+");
169 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
170 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
171 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
172 static const regex re_email_user("[[:alnum:]_.-]+@");
173 static const regex re_xml_header("xml .*\?>$");
175 /*=========================================================================
176 ** NAME : readXMLinput
177 ** SYNOPSIS : int readXMLinput(String first_line)
179 ** RETURN VALUE : Parse the XML input and extract the header information
187 ** LAST MODIFIED : Jul 24, 2003
188 **=========================================================================
191 int client_message::readXMLinput(String first_line)
193 xmlParserCtxtPtr ctxt;
195 xmlNodePtr root, item;
196 xmlNsPtr namespaces[1];
198 xmlXPathObjectPtr res;
199 xmlXPathContextPtr pathcontext;
202 ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL);
203 while (input >> line)
205 xmlParseChunk(ctxt, line, ~line, 0);
207 xmlParseChunk(ctxt, "", 0, 1);
208 xmlDom = ctxt->myDoc;
209 xmlFreeParserCtxt(ctxt);
211 root = xmlDocGetRootElement(xmlDom);
212 namespaces[0] = root->ns;
214 //TODO Ought to check root->name and root->ns->href
216 pathcontext = xmlXPathNewContext(xmlDom);
217 pathcontext->node = xmlDocGetRootElement(xmlDom);
218 pathcontext->namespaces = namespaces;
219 pathcontext->nsNr = 1;
222 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
225 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
226 if (res->nodesetval != NULL)
229 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
231 item = *res->nodesetval->nodeTab;
233 // Select a line cooker based on the message type.
236 std::cout << "Looking for a line cooker for " << item->content << "\n";
238 std::list<line_cooker *>::iterator lci = kitchen.begin();
240 while (pan == 0 && lci != kitchen.end())
243 if (pan->message_type() != (const char *)(item->content))
251 *log << "Can not find a line cooker for message type " << item->content << "\n";
256 *log << "Message type not found in XML header.\n";
259 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
260 if (res->nodesetval != NULL)
263 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
265 item = *res->nodesetval->nodeTab;
266 hostname = (const char *)item->content;
270 *log << "Hostname not found in XML header.\n";
273 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
274 if (res->nodesetval != NULL)
276 item = *res->nodesetval->nodeTab;
277 service = (const char *)item->content;
279 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
280 if (res->nodesetval != NULL)
282 item = *res->nodesetval->nodeTab;
283 arrival = String((char *)item->content);
286 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
290 /*=========================================================================
292 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
294 ** RETURN VALUE : The certainty with which the message is classified.
302 ** LAST MODIFIED : Aug 11, 2003
303 **=========================================================================
306 double client_message::classify(String host, UTC arriv, String serv)
314 /* First, check if the message has a mail header. */
316 if (input >> line && line == re_uxmail_from)
322 /* Scan ahead for the hostname and date of arrival. */
324 while (input >> line && line != "")
326 if (line == re_mail_From)
328 from_address = line(re_email_address);
329 from_address(re_email_user) = ""; // Remove the user part;
330 if (from_address != "" && ~hostname < ~from_address)
332 *log << "Detected hostname " << from_address << "\n";
333 hostname = from_address;
336 if (line == re_mail_MsId)
338 from_address = line(re_email_address);
339 from_address(re_email_user) = ""; // Remove the user part;
340 if (from_address != "" && ~hostname < ~from_address)
342 *log << "Detected hostname " << from_address << "\n";
343 hostname = from_address;
346 if (line == re_mail_Date)
348 arrival = UTC(line(regex(mail_date_re)));
354 // Push the first line back, we need to read it again.
362 * Now that we have the mail header out of the way, try to figure
363 * out what the content of the message is.
367 while (input >> line && certainty < 0.9)
371 *log << " testing: " << line << "\n";
374 if (line == re_xml_header)
377 classification = XML;
380 *log << "XML input detected.\n";
384 else if (line == re_PGP)
387 gpg_encrypted = true;
388 *log << "The message is PGP/GnuPG encrypted.\n";
390 else if (line == re_dump)
395 *log << "DUMP output detected.\n";
398 else if (line == re_rpm)
401 classification = RPMLIST;
405 *log << "RPM package list detected.\n";
410 // Scan the list of line cookers if there is anything familiar.
412 std::list<line_cooker *>::iterator lci = kitchen.begin();
414 while (pan == 0 && lci != kitchen.end())
417 if (!pan->check_pattern(line))
426 classification = COOKER_OBJECT;
429 *log << "Detected message type " << pan->message_type() << "\n";
438 *log << "Can not determine the hostname where the message came from.\n";
441 else if (!arrival.proper())
443 *log << "Arrival time is not knwon.\n";
454 /*=========================================================================
456 ** SYNOPSIS : int enterXML()
458 ** RETURN VALUE : None
460 ** DESCRIPTION : Analyze the DOM tree from the XML input.
461 ** The DOM tree was previously parsed by readXMLinput().
467 ** LAST MODIFIED : Jul 24, 2003
468 **=========================================================================
471 void client_message::enterXML()
473 xmlXPathObjectPtr res;
474 xmlXPathContextPtr pathcontext;
475 xmlNsPtr namespaces[1];
477 /* Try to find the host in the database */
481 objectid = database.find_host(hostname);
484 *log << "Please define the host " << hostname << " in the database.\n";
489 *log << "Object id for " << hostname << " is " << objectid << "\n";
492 pathcontext = xmlXPathNewContext(xmlDom);
493 pathcontext->node = xmlDocGetRootElement(xmlDom);
494 namespaces[0] = pathcontext->node->ns;
495 pathcontext->namespaces = namespaces;
496 pathcontext->nsNr = 1;
498 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
500 if (res->nodesetval != NULL)
502 // Find the first child element of the <data> element.
504 xmlNodePtr node = *res->nodesetval->nodeTab;
505 while (node->type != XML_ELEMENT_NODE)
509 if (strcmp((char *)node->name, "log") == 0)
511 // Each child contains a log entry, raw or cooked.
513 node = node->children;
516 if (node->type == XML_ELEMENT_NODE)
524 if (strcmp((char *)node->name, "raw") == 0)
526 item = node->children;
529 *log << "Can not cook this type of <raw> log element.\n";
533 raw = String((const char *)item->content);
534 if (pan->cook_this(raw, arrival))
536 log_hostname = pan->hostname();
537 if (log_hostname == "")
539 log_hostname = hostname;
541 log_service = pan->service();
542 log_date = pan->timestamp();
546 *log << "Log line " << raw << " does not match.\n";
551 else if (strcmp((char *)node->name, "cooked") == 0)
553 // Find the parts of the log entry
557 *log << "Analyzing cooked element.\n";
559 pathcontext->node = node;
561 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
562 if (res->nodesetval != NULL)
564 item = *res->nodesetval->nodeTab;
565 log_hostname = (const char *)item->content;
566 if (log_hostname != hostname(0, ~log_hostname))
568 *log << "Hostname " << log_hostname << " does not match.\n";
574 log_hostname = hostname;
577 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
578 if (res->nodesetval != NULL)
580 item = *res->nodesetval->nodeTab;
581 log_service = (const char *)item->content;
585 log_service = service;
588 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
589 if (res->nodesetval != NULL)
591 item = *res->nodesetval->nodeTab;
592 log_date = String((const char *)item->content);
596 *log << "<timestamp> missing from cooked log element.\n";
599 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
600 if (res->nodesetval != NULL)
602 item = *res->nodesetval->nodeTab;
603 raw = String((const char *)item->content);
607 *log << "<raw> missing from cooked log element.\n";
612 // Insert a new log record into the database.
613 if (raw != "" && log_hostname != "" && log_date.proper())
615 String insertion("insert into log (objectid, servicecode,"
616 " object_timestamp, timestamp, rawdata, processed) values (");
618 /* Insert a new record into the log table */
620 insertion += "'" + objectid + "',";
621 insertion += "'" + log_service + "',";
622 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
623 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
624 insertion += "'" + SQL_Escape(raw) + "',FALSE";
629 *log << insertion << "\n";
633 database.Query(insertion);
648 *log << "Data element " << node->name << " is not supported.\n";
653 *log << "Data node not found.\n";
657 /*=========================================================================
659 ** SYNOPSIS : int enter()
661 ** RETURN VALUE : The number of lines successfully parsed from the input
669 ** LAST MODIFIED : Jul 24, 2003
670 **=========================================================================
673 int client_message::enter()
675 if (classification == XML)
685 String change_notification("");
686 String create_notification("");
687 bool initial_entry = false;
689 std::list<String> packages;
692 /* Double-check the classification of the message */
694 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
701 // Skip the mail header.
703 while (input >> line && line != "");
706 /* Try to find the host in the database */
710 objectid = database.find_host(hostname);
713 *log << "Please define the host " << hostname << " in the database.\n";
718 *log << "Object id for " << hostname << " is " << objectid << "\n";
721 if (classification == RPMLIST)
726 /* Read all packages, so we will know which ones are */
727 /* missing at the end. */
729 qry = "select name from parameter where objectid='";
730 qry += objectid + "' and class='package'";
731 n_packages = database.Query(qry);
732 initial_entry = n_packages == 0;
735 *log << n_packages << " packages in database.\n";
737 for (int t = 0; t < n_packages; t++)
739 packages.push_back(database.Field(t, "name"));
742 *log << "Package list built: " << packages.size() << ".\n";
746 /* Scan the input line by line, entring records into the database */
748 String rest; // Rest of the line to be parsed
751 while (input >> line)
755 *log << line << "\n";
759 /* Check each line if it contains valid information */
763 switch (classification)
779 String insertion("insert into log (objectid, servicecode,"
780 " object_timestamp, timestamp, rawdata, processed) values (");
783 switch (classification)
787 std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
789 pan->cook_this(line, arrival);
791 if (pan->hostname() == hostname(0,~pan->hostname()))
795 std::cerr << " Information from cooker:\n";
796 std::cerr << " timestamp = " << pan->timestamp() << "\n";
797 std::cerr << " hostname = " << pan->hostname() << "\n";
798 std::cerr << " service = " << pan->service() << "\n";
800 /* Insert a new record into the log table */
802 insertion += "'" + objectid + "',";
803 insertion += "'" + pan->service() + "',";
804 insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
805 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
806 insertion += "'" + SQL_Escape(line) + "',FALSE";
811 *log << insertion << "\n";
815 database.Query(insertion);
827 *log << " Hostname " << pan->hostname() << " does not match.\n";
833 // Scan a list of packages and versions from "rpm -a".
834 // A similar listing can be created on IRIX 6.5 by using the
835 // command "showprods -3 -n|awk '{printf "%s-%s\n",$2,$3}'|grep -v '^[-=]' \
836 // |grep -v Version-Description".
838 // We have to separate the package name and the version.
839 // The separation is marked by a '-', followed by a digit.
840 // However, there may be other sequences of '-'digit in the package name,
841 // do we have to scan ahead until there is at most one such sequence
842 // left in the version string. The '-'digit seqeunce inside the
843 // version usually separates the version and the release number.
845 int version_start, next_version_start;
849 next_version_start = i;
851 while (i < ~line - 1)
853 while (i < ~line - 1 && !(line[i] == '-' && isdigit(line[i + 1])))
859 version_start = next_version_start;
860 next_version_start = i;
865 if (!isdigit(line[version_start + 1]))
867 version_start = next_version_start;
869 String package(line(0,version_start));
870 String version(line(version_start + 1, ~line));
877 *log << "Package is " << package;
878 *log << ", version is " << version << "\n";
881 // Construct a qry to check the package's existance
883 qry = "select paramid from parameter where objectid='";
884 qry += objectid + "' and class='package' and name='";
885 qry += package + "'";
887 if (database.Query(qry) == 1)
889 std::list<String>::iterator lp;
891 lp = find(packages.begin(), packages.end(), package);
892 if (lp != packages.end())
898 *log << "Could NOT find " << package << " in list.\n";
901 paramid = database.Field(0, "paramid");
902 qry = "select value from property where paramid='";
903 qry += paramid + "' and name='version'";
904 if (database.Query(qry) == 0)
906 *log << "Database corruption: Package " << package;
907 *log << " does not have a 'version' property.\n";
909 else if (database.Field(0, "value") != version)
913 *log << " Parameter " << package << " has different version\n";
915 insertion = "update property set value='";
916 insertion += version + "' where paramid='";
917 insertion += paramid + "' and name='version'";
919 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
920 insert_h += " values ('";
921 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'MODIFIED', 'version', '";
922 insert_h += version + "')";
924 database.Query(insertion);
925 database.Query(insert_h);
927 if (change_notification == "")
929 remark = "Gnucomo detected a different version for package parameter(s) ";
930 change_notification = database.new_notification(objectid, "property modified", remark);
933 if (change_notification != "")
935 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
936 insertion += change_notification + "', '";
937 insertion += paramid + "')";
939 database.Query(insertion);
943 *log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
950 *log << " Parameter " << package << " has not changed.\n";
959 *log << " Parameter " << package << " does not exist.\n";
961 // Create a new package parameter, including version property and history record
963 insertion = "insert into parameter (objectid, name, class, description) values ('";
964 insertion += objectid + "', '" + package + "', 'package', 'RPM package " + package + "')";
968 *log << insertion << "\n";
972 database.Query(insertion);
973 qry = "select paramid from parameter where objectid='";
974 qry += objectid + "' and class='package' and name='";
975 qry += package + "'";
977 paramid = database.Field(0, "paramid");
980 insertion = "insert into property (paramid, name, value, type) values ('";
981 insertion += paramid + "', 'version', '";
982 insertion += version + "', 'STATIC')";
983 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
984 insert_h += " values ('";
985 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'CREATED', 'version', '";
986 insert_h += version + "')";
990 *log << insertion << "\n" << insert_h << "\n";
994 database.Query(insertion);
995 database.Query(insert_h);
998 if (create_notification == "")
1000 remark = "Gnucomo detected new parameter(s) of class package";
1001 create_notification = database.new_notification(objectid, "parameter created", remark);
1003 if (create_notification != "")
1005 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
1006 insertion += create_notification + "', '";
1007 insertion += paramid + "')";
1009 database.Query(insertion);
1013 *log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
1031 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
1035 if (classification == RPMLIST && !incremental)
1037 std::list<String>::iterator lp;
1038 String remove_notification("");
1041 * If there are any packages left in the list, they seem to have
1042 * disappeared from the system.
1045 for (lp = packages.begin(); lp != packages.end(); lp++)
1051 // Construct a qry to check the package's existance
1053 qry = "select paramid from parameter where objectid='";
1054 qry += objectid + "' and class='package' and name='";
1057 if (database.Query(qry) == 1)
1059 paramid = database.Field(0, "paramid");
1060 qry ="select change_nature from history where paramid='";
1061 qry += paramid + "' order by modified desc";
1062 if (database.Query(qry) <= 0)
1064 *log << "Database ERROR: no history record for parameter " << *lp << ".\n";
1066 else if (database.Field(0, "change_nature") != "REMOVED")
1070 *log << "Removing parameter " << *lp << ".\n";
1073 insert = "insert into history (paramid, modified, change_nature)";
1074 insert += " values ('";
1075 insert += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'REMOVED')";
1077 database.Query(insert);
1079 if (remove_notification == "")
1081 remark = "Gnucomo detected that package(s) have disappeared ";
1082 remove_notification = database.new_notification(objectid, "parameter removed", remark);
1085 if (remove_notification != "")
1087 insert = "insert into parameter_notification (notificationid, paramid) values ('";
1088 insert += remove_notification + "', '";
1089 insert += paramid + "')";
1091 database.Query(insert);
1095 *log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
1104 *log << nr_lines << " lines parsed from the log file.\n";