2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.15 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Jul 24, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.15 2003-10-27 11:28:27 arjen
30 Do not add another parameter_notification record is the notification
31 already exists for that parameter.
33 Revision 1.14 2003/09/01 06:57:14 arjen
34 Reject log entries that are found to be invalid.
36 Revision 1.13 2003/08/16 15:28:45 arjen
37 Fixed a namespace problem
39 Revision 1.12 2003/08/11 16:56:16 arjen
40 Different kinds of log files are parsed by a collection of objects
41 of different classes, derived from the base class line_cooker
42 Depending on the message content or the message_type element in
43 XML, one of these objects is selected.
45 Logrunner is integrated with gcm_input. Although its functionality
46 is still limited, a connection between logrunner and gcm_input
49 Revision 1.11 2003/08/05 08:15:00 arjen
50 Debug output to the log stream instead of cerr.
51 Fixed namespace problems in XPath searches of the DOM.
52 Moved string utility functions to a separate file.
54 Revision 1.10 2003/04/29 09:16:44 arjen
56 Only cooked log entries for now.
58 Revision 1.9 2003/03/29 09:04:10 arjen
59 Extract the hostname out of the 'From:' or 'Message-Id:' line
62 Revision 1.8 2003/03/16 09:42:40 arjen
63 Read IRIX system logs.
65 Revision 1.7 2003/02/21 08:08:05 arjen
66 Gcm_input also detects packages that are removed from the system.
67 Determining the version number of a package in a RPM
68 list is improved. Only the last one or two parts of the string that
69 begin with a '-' and a number are considered the version.
71 Revision 1.6 2003/02/05 09:37:51 arjen
72 Create notifications when a new package is discovered
73 in a 'rpm -qa' list or when the version of a package is changed.
75 Revision 1.4 2002/12/06 22:26:28 arjen
76 Set the value of log.processed to FALSE when inserting a
77 new log entry into the database
78 When a syslog entry arrives from last year, gcm_input subtracts one from the
79 year of arrival to create the year of the log entry.
80 Read output from "rpm -qa" and enter packages in the parameter table.
82 Revision 1.3 2002/11/09 08:04:27 arjen
83 Added a reference to the GPL
85 Revision 1.2 2002/11/04 10:13:36 arjen
86 Use proper namespace for iostream classes
88 Revision 1.1 2002/10/05 10:25:49 arjen
89 Creation of gcm_input and a first approach to a web interface
91 *****************************/
93 static const char *RCSID = "$Id: message.cpp,v 1.15 2003-10-27 11:28:27 arjen Exp $";
96 #include <libxml/xpath.h>
97 #include <libxml/debugXML.h>
102 extern bool verbose; /* Defined in the main application */
103 extern bool testmode;
104 extern bool incremental;
105 extern std::ostream *log;
107 /* Utility functions */
109 extern String SQL_Escape(String s);
111 /*=========================================================================
112 ** NAME : operator >>
113 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
115 ** RETURN VALUE : True if input was available.
117 ** DESCRIPTION : Input operator. Read the next line from the message.
123 ** LAST MODIFIED : Nov 04, 2002
124 **=========================================================================
127 bool operator >> (message_buffer &b, String &s)
129 bool input_ok = false;
131 if (b.next_line == b.buffer.end())
137 b.buffer.push_back(l);
139 // next_line keeps pointing to the end.
154 client_message::client_message(std::istream *in, gnucomo_database db)
161 gpg_encrypted = false;
162 classification = UNKNOWN;
167 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
168 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
170 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
171 static const regex re_dump("^ *DUMP: Date of this level");
172 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
174 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
175 static const regex re_mail_From("^From:[[:blank:]]+");
176 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
177 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
178 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
179 static const regex re_email_user("[[:alnum:]_.-]+@");
180 static const regex re_xml_header("xml .*\?>$");
182 /*=========================================================================
183 ** NAME : readXMLinput
184 ** SYNOPSIS : int readXMLinput(String first_line)
186 ** RETURN VALUE : Parse the XML input and extract the header information
194 ** LAST MODIFIED : Jul 24, 2003
195 **=========================================================================
198 int client_message::readXMLinput(String first_line)
200 xmlParserCtxtPtr ctxt;
202 xmlNodePtr root, item;
203 xmlNsPtr namespaces[1];
205 xmlXPathObjectPtr res;
206 xmlXPathContextPtr pathcontext;
209 ctxt = xmlCreatePushParserCtxt(NULL, NULL, first_line, ~first_line, NULL);
210 while (input >> line)
212 xmlParseChunk(ctxt, line, ~line, 0);
214 xmlParseChunk(ctxt, "", 0, 1);
215 xmlDom = ctxt->myDoc;
216 xmlFreeParserCtxt(ctxt);
218 root = xmlDocGetRootElement(xmlDom);
219 namespaces[0] = root->ns;
221 //TODO Ought to check root->name and root->ns->href
223 pathcontext = xmlXPathNewContext(xmlDom);
224 pathcontext->node = xmlDocGetRootElement(xmlDom);
225 pathcontext->namespaces = namespaces;
226 pathcontext->nsNr = 1;
229 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
232 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
233 if (res->nodesetval != NULL)
236 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
238 item = *res->nodesetval->nodeTab;
240 // Select a line cooker based on the message type.
243 std::cout << "Looking for a line cooker for " << item->content << "\n";
245 std::list<line_cooker *>::iterator lci = kitchen.begin();
247 while (pan == 0 && lci != kitchen.end())
250 if (pan->message_type() != (const char *)(item->content))
258 *log << "Can not find a line cooker for message type " << item->content << "\n";
263 *log << "Message type not found in XML header.\n";
266 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
267 if (res->nodesetval != NULL)
270 xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
272 item = *res->nodesetval->nodeTab;
273 hostname = (const char *)item->content;
277 *log << "Hostname not found in XML header.\n";
280 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
281 if (res->nodesetval != NULL)
283 item = *res->nodesetval->nodeTab;
284 service = (const char *)item->content;
286 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
287 if (res->nodesetval != NULL)
289 item = *res->nodesetval->nodeTab;
290 arrival = String((char *)item->content);
293 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
297 /*=========================================================================
299 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
301 ** RETURN VALUE : The certainty with which the message is classified.
309 ** LAST MODIFIED : Aug 11, 2003
310 **=========================================================================
313 double client_message::classify(String host, UTC arriv, String serv)
321 /* First, check if the message has a mail header. */
323 if (input >> line && line == re_uxmail_from)
329 /* Scan ahead for the hostname and date of arrival. */
331 while (input >> line && line != "")
333 if (line == re_mail_From)
335 from_address = line(re_email_address);
336 from_address(re_email_user) = ""; // Remove the user part;
337 if (from_address != "" && ~hostname < ~from_address)
339 *log << "Detected hostname " << from_address << "\n";
340 hostname = from_address;
343 if (line == re_mail_MsId)
345 from_address = line(re_email_address);
346 from_address(re_email_user) = ""; // Remove the user part;
347 if (from_address != "" && ~hostname < ~from_address)
349 *log << "Detected hostname " << from_address << "\n";
350 hostname = from_address;
353 if (line == re_mail_Date)
355 arrival = UTC(line(regex(mail_date_re)));
361 // Push the first line back, we need to read it again.
369 * Now that we have the mail header out of the way, try to figure
370 * out what the content of the message is.
374 while (input >> line && certainty < 0.9)
378 *log << " testing: " << line << "\n";
381 if (line == re_xml_header)
384 classification = XML;
387 *log << "XML input detected.\n";
391 else if (line == re_PGP)
394 gpg_encrypted = true;
395 *log << "The message is PGP/GnuPG encrypted.\n";
397 else if (line == re_dump)
402 *log << "DUMP output detected.\n";
405 else if (line == re_rpm)
408 classification = RPMLIST;
412 *log << "RPM package list detected.\n";
417 // Scan the list of line cookers if there is anything familiar.
419 std::list<line_cooker *>::iterator lci = kitchen.begin();
421 while (pan == 0 && lci != kitchen.end())
424 if (!pan->check_pattern(line))
433 classification = COOKER_OBJECT;
436 *log << "Detected message type " << pan->message_type() << "\n";
445 *log << "Can not determine the hostname where the message came from.\n";
448 else if (!arrival.proper())
450 *log << "Arrival time is not known.\n";
461 /*=========================================================================
463 ** SYNOPSIS : int enterXML()
465 ** RETURN VALUE : None
467 ** DESCRIPTION : Analyze the DOM tree from the XML input.
468 ** The DOM tree was previously parsed by readXMLinput().
474 ** LAST MODIFIED : Jul 24, 2003
475 **=========================================================================
478 void client_message::enterXML()
480 xmlXPathObjectPtr res;
481 xmlXPathContextPtr pathcontext;
482 xmlNsPtr namespaces[1];
484 /* Try to find the host in the database */
488 objectid = database.find_host(hostname);
491 *log << "Please define the host " << hostname << " in the database.\n";
496 *log << "Object id for " << hostname << " is " << objectid << "\n";
499 pathcontext = xmlXPathNewContext(xmlDom);
500 pathcontext->node = xmlDocGetRootElement(xmlDom);
501 namespaces[0] = pathcontext->node->ns;
502 pathcontext->namespaces = namespaces;
503 pathcontext->nsNr = 1;
505 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
507 if (res->nodesetval != NULL)
509 // Find the first child element of the <data> element.
511 xmlNodePtr node = *res->nodesetval->nodeTab;
512 while (node->type != XML_ELEMENT_NODE)
516 if (strcmp((char *)node->name, "log") == 0)
518 // Each child contains a log entry, raw or cooked.
520 node = node->children;
523 if (node->type == XML_ELEMENT_NODE)
531 if (strcmp((char *)node->name, "raw") == 0)
533 item = node->children;
536 *log << "Can not cook this type of <raw> log element.\n";
540 raw = String((const char *)item->content);
541 if (pan->cook_this(raw, arrival))
543 log_hostname = pan->hostname();
544 if (log_hostname == "")
546 log_hostname = hostname;
548 log_service = pan->service();
549 log_date = pan->timestamp();
553 *log << "Log line " << raw << " does not match.\n";
558 else if (strcmp((char *)node->name, "cooked") == 0)
560 // Find the parts of the log entry
564 *log << "Analyzing cooked element.\n";
566 pathcontext->node = node;
568 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
569 if (res->nodesetval != NULL)
571 item = *res->nodesetval->nodeTab;
572 log_hostname = (const char *)item->content;
573 if (log_hostname != hostname(0, ~log_hostname))
575 *log << "Hostname " << log_hostname << " does not match.\n";
581 log_hostname = hostname;
584 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
585 if (res->nodesetval != NULL)
587 item = *res->nodesetval->nodeTab;
588 log_service = (const char *)item->content;
592 log_service = service;
595 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
596 if (res->nodesetval != NULL)
598 item = *res->nodesetval->nodeTab;
599 log_date = String((const char *)item->content);
603 *log << "<timestamp> missing from cooked log element.\n";
606 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
607 if (res->nodesetval != NULL)
609 item = *res->nodesetval->nodeTab;
610 raw = String((const char *)item->content);
614 *log << "<raw> missing from cooked log element.\n";
619 // Insert a new log record into the database.
620 if (raw != "" && log_hostname != "" && log_date.proper())
622 String insertion("insert into log (objectid, servicecode,"
623 " object_timestamp, timestamp, rawdata, processed) values (");
625 /* Insert a new record into the log table */
627 insertion += "'" + objectid + "',";
628 insertion += "'" + log_service + "',";
629 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
630 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
631 insertion += "'" + SQL_Escape(raw) + "',FALSE";
636 *log << insertion << "\n";
640 database.Query(insertion);
655 *log << "Data element " << node->name << " is not supported.\n";
660 *log << "Data node not found.\n";
664 /*=========================================================================
666 ** SYNOPSIS : int enter()
668 ** RETURN VALUE : The number of lines successfully parsed from the input
676 ** LAST MODIFIED : Jul 24, 2003
677 **=========================================================================
680 int client_message::enter()
682 if (classification == XML)
692 String change_notification("");
693 String create_notification("");
694 bool initial_entry = false;
696 std::list<String> packages;
699 /* Double-check the classification of the message */
701 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
708 // Skip the mail header.
710 while (input >> line && line != "");
713 /* Try to find the host in the database */
717 objectid = database.find_host(hostname);
720 *log << "Please define the host " << hostname << " in the database.\n";
725 *log << "Object id for " << hostname << " is " << objectid << "\n";
728 if (classification == RPMLIST)
733 /* Read all packages, so we will know which ones are */
734 /* missing at the end. */
736 qry = "select name from parameter where objectid='";
737 qry += objectid + "' and class='package'";
738 n_packages = database.Query(qry);
739 initial_entry = n_packages == 0;
742 *log << n_packages << " packages in database.\n";
744 for (int t = 0; t < n_packages; t++)
746 packages.push_back(database.Field(t, "name"));
749 *log << "Package list built: " << packages.size() << ".\n";
753 /* Scan the input line by line, entring records into the database */
755 String rest; // Rest of the line to be parsed
758 while (input >> line)
762 *log << line << "\n";
766 /* Check each line if it contains valid information */
770 switch (classification)
786 String insertion("insert into log (objectid, servicecode,"
787 " object_timestamp, timestamp, rawdata, processed) values (");
790 switch (classification)
794 std::cerr << "\ncooker check: " << pan->check_pattern(line) << "\n";
796 if (pan->cook_this(line, arrival))
798 if (pan->hostname() == hostname(0,~pan->hostname()))
802 std::cerr << " Information from cooker:\n";
803 std::cerr << " timestamp = " << pan->timestamp() << "\n";
804 std::cerr << " hostname = " << pan->hostname() << "\n";
805 std::cerr << " service = " << pan->service() << "\n";
807 /* Insert a new record into the log table */
809 insertion += "'" + objectid + "',";
810 insertion += "'" + pan->service() + "',";
811 insertion += "'" + pan->timestamp().format("%Y-%m-%d %T") + "',";
812 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
813 insertion += "'" + SQL_Escape(line) + "',FALSE";
818 *log << insertion << "\n";
822 database.Query(insertion);
834 *log << " Hostname " << pan->hostname() << " does not match.\n";
839 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
845 // Scan a list of packages and versions from "rpm -a".
846 // A similar listing can be created on IRIX 6.5 by using the
847 // command "showprods -3 -n|awk '{printf "%s-%s\n",$2,$3}'|grep -v '^[-=]' \
848 // |grep -v Version-Description".
850 // We have to separate the package name and the version.
851 // The separation is marked by a '-', followed by a digit.
852 // However, there may be other sequences of '-'digit in the package name,
853 // do we have to scan ahead until there is at most one such sequence
854 // left in the version string. The '-'digit seqeunce inside the
855 // version usually separates the version and the release number.
857 int version_start, next_version_start;
861 next_version_start = i;
863 while (i < ~line - 1)
865 while (i < ~line - 1 && !(line[i] == '-' && isdigit(line[i + 1])))
871 version_start = next_version_start;
872 next_version_start = i;
877 if (!isdigit(line[version_start + 1]))
879 version_start = next_version_start;
881 String package(line(0,version_start));
882 String version(line(version_start + 1, ~line));
889 *log << "Package is " << package;
890 *log << ", version is " << version << "\n";
893 // Construct a qry to check the package's existance
895 qry = "select paramid from parameter where objectid='";
896 qry += objectid + "' and class='package' and name='";
897 qry += package + "'";
899 if (database.Query(qry) == 1)
901 std::list<String>::iterator lp;
903 lp = find(packages.begin(), packages.end(), package);
904 if (lp != packages.end())
910 *log << "Could NOT find " << package << " in list.\n";
913 paramid = database.Field(0, "paramid");
914 qry = "select value from property where paramid='";
915 qry += paramid + "' and name='version'";
916 if (database.Query(qry) == 0)
918 *log << "Database corruption: Package " << package;
919 *log << " does not have a 'version' property.\n";
921 else if (database.Field(0, "value") != version)
925 *log << " Parameter " << package << " has different version\n";
927 insertion = "update property set value='";
928 insertion += version + "' where paramid='";
929 insertion += paramid + "' and name='version'";
931 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
932 insert_h += " values ('";
933 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'MODIFIED', 'version', '";
934 insert_h += version + "')";
936 database.Query(insertion);
937 database.Query(insert_h);
939 if (change_notification == "")
941 remark = "Gnucomo detected a different version for package parameter(s) ";
942 change_notification = database.new_notification(objectid, "property modified", remark);
943 change_notification = database.new_notification(objectid, "property modified", remark);
946 if (change_notification != "")
948 qry = "select * from parameter_notification where notificationid='";
949 qry += change_notification + "' and paramid='";
950 qry += paramid + "'";
952 if (database.Query(qry) == 0)
954 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
955 insertion += change_notification + "', '";
956 insertion += paramid + "')";
958 database.Query(insertion);
963 *log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
970 *log << " Parameter " << package << " has not changed.\n";
979 *log << " Parameter " << package << " does not exist.\n";
981 // Create a new package parameter, including version property and history record
983 insertion = "insert into parameter (objectid, name, class, description) values ('";
984 insertion += objectid + "', '" + package + "', 'package', 'RPM package " + package + "')";
988 *log << insertion << "\n";
992 database.Query(insertion);
993 qry = "select paramid from parameter where objectid='";
994 qry += objectid + "' and class='package' and name='";
995 qry += package + "'";
997 paramid = database.Field(0, "paramid");
1000 insertion = "insert into property (paramid, name, value, type) values ('";
1001 insertion += paramid + "', 'version', '";
1002 insertion += version + "', 'STATIC')";
1003 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
1004 insert_h += " values ('";
1005 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'CREATED', 'version', '";
1006 insert_h += version + "')";
1010 *log << insertion << "\n" << insert_h << "\n";
1014 database.Query(insertion);
1015 database.Query(insert_h);
1018 if (create_notification == "")
1020 remark = "Gnucomo detected new parameter(s) of class package";
1021 create_notification = database.new_notification(objectid, "parameter created", remark);
1023 if (create_notification != "")
1025 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
1026 insertion += create_notification + "', '";
1027 insertion += paramid + "')";
1029 database.Query(insertion);
1033 *log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
1051 *log << "gcm_input WARNING: Not a valid line: " << line << "\n";
1055 if (classification == RPMLIST && !incremental)
1057 std::list<String>::iterator lp;
1058 String remove_notification("");
1061 * If there are any packages left in the list, they seem to have
1062 * disappeared from the system.
1065 for (lp = packages.begin(); lp != packages.end(); lp++)
1071 // Construct a qry to check the package's existance
1073 qry = "select paramid from parameter where objectid='";
1074 qry += objectid + "' and class='package' and name='";
1077 if (database.Query(qry) == 1)
1079 paramid = database.Field(0, "paramid");
1080 qry ="select change_nature from history where paramid='";
1081 qry += paramid + "' order by modified desc";
1082 if (database.Query(qry) <= 0)
1084 *log << "Database ERROR: no history record for parameter " << *lp << ".\n";
1086 else if (database.Field(0, "change_nature") != "REMOVED")
1090 *log << "Removing parameter " << *lp << ".\n";
1093 insert = "insert into history (paramid, modified, change_nature)";
1094 insert += " values ('";
1095 insert += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'REMOVED')";
1097 database.Query(insert);
1099 if (remove_notification == "")
1101 remark = "Gnucomo detected that package(s) have disappeared ";
1102 remove_notification = database.new_notification(objectid, "parameter removed", remark);
1105 if (remove_notification != "")
1107 insert = "insert into parameter_notification (notificationid, paramid) values ('";
1108 insert += remove_notification + "', '";
1109 insert += paramid + "')";
1111 database.Query(insert);
1115 *log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
1124 *log << nr_lines << " lines parsed from the log file.\n";