2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.16 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Nov 28, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.16 2003-12-04 10:38:09 arjen
30 Major redesign. All input is handled through XML. Raw input data is first
31 transformed into an XML document for further processing.
32 A collection of polymorphic classes handle the transformation of various
33 input formats into XML.
34 Classifying input data is done with a finite improbability calculation.
36 Revision 1.15 2003/10/27 11:28:27 arjen
37 Do not add another parameter_notification record is the notification
38 already exists for that parameter.
40 Revision 1.14 2003/09/01 06:57:14 arjen
41 Reject log entries that are found to be invalid.
43 Revision 1.13 2003/08/16 15:28:45 arjen
44 Fixed a namespace problem
46 Revision 1.12 2003/08/11 16:56:16 arjen
47 Different kinds of log files are parsed by a collection of objects
48 of different classes, derived from the base class line_cooker
49 Depending on the message content or the message_type element in
50 XML, one of these objects is selected.
52 Logrunner is integrated with gcm_input. Although its functionality
53 is still limited, a connection between logrunner and gcm_input
56 Revision 1.11 2003/08/05 08:15:00 arjen
57 Debug output to the log stream instead of cerr.
58 Fixed namespace problems in XPath searches of the DOM.
59 Moved string utility functions to a separate file.
61 Revision 1.10 2003/04/29 09:16:44 arjen
63 Only cooked log entries for now.
65 Revision 1.9 2003/03/29 09:04:10 arjen
66 Extract the hostname out of the 'From:' or 'Message-Id:' line
69 Revision 1.8 2003/03/16 09:42:40 arjen
70 Read IRIX system logs.
72 Revision 1.7 2003/02/21 08:08:05 arjen
73 Gcm_input also detects packages that are removed from the system.
74 Determining the version number of a package in a RPM
75 list is improved. Only the last one or two parts of the string that
76 begin with a '-' and a number are considered the version.
78 Revision 1.6 2003/02/05 09:37:51 arjen
79 Create notifications when a new package is discovered
80 in a 'rpm -qa' list or when the version of a package is changed.
82 Revision 1.4 2002/12/06 22:26:28 arjen
83 Set the value of log.processed to FALSE when inserting a
84 new log entry into the database
85 When a syslog entry arrives from last year, gcm_input subtracts one from the
86 year of arrival to create the year of the log entry.
87 Read output from "rpm -qa" and enter packages in the parameter table.
89 Revision 1.3 2002/11/09 08:04:27 arjen
90 Added a reference to the GPL
92 Revision 1.2 2002/11/04 10:13:36 arjen
93 Use proper namespace for iostream classes
95 Revision 1.1 2002/10/05 10:25:49 arjen
96 Creation of gcm_input and a first approach to a web interface
98 *****************************/
100 static const char *RCSID = "$Id: message.cpp,v 1.16 2003-12-04 10:38:09 arjen Exp $";
103 #include <libxml/xpath.h>
104 #include <libxml/debugXML.h>
109 extern bool verbose; /* Defined in the main application */
110 extern bool testmode;
111 extern bool incremental;
112 extern std::ostream *Log;
114 /* Utility functions */
116 extern String SQL_Escape(String s);
118 /*=========================================================================
119 ** NAME : operator >>
120 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
122 ** RETURN VALUE : True if input was available.
124 ** DESCRIPTION : Input operator. Read the next line from the message.
130 ** LAST MODIFIED : Nov 04, 2002
131 **=========================================================================
134 bool operator >> (message_buffer &b, String &s)
136 bool input_ok = false;
138 if (b.next_line == b.buffer.end())
144 b.buffer.push_back(l);
146 // next_line keeps pointing to the end.
161 /*=========================================================================
162 ** NAME : client_message
163 ** SYNOPSIS : client_message(std::istream *in, gnucomo_database db)
165 ** RETURN VALUE : None
167 ** DESCRIPTION : Client message constructor.
173 ** LAST MODIFIED : Nov 04, 2002
174 **=========================================================================
177 client_message::client_message(std::istream *in, gnucomo_database db)
184 gpg_encrypted = false;
185 classification = UNKNOWN;
190 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
192 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
193 static const regex re_dump("^ *DUMP: Date of this level");
194 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
196 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
197 static const regex re_xml_header("xml .*\?>$");
199 /*=========================================================================
200 ** NAME : extractHeader
201 ** SYNOPSIS : void extractHeader()
203 ** RETURN VALUE : True if the mandatory header elements are available.
205 ** DESCRIPTION : Extract the header information from the XML DOM tree.
211 ** LAST MODIFIED : Nov 26, 2003
212 **=========================================================================
215 bool client_message::extractHeader()
217 xmlNodePtr root, item;
218 xmlNsPtr namespaces[1];
220 xmlXPathObjectPtr res;
221 xmlXPathContextPtr pathcontext;
223 bool header_OK = true;
225 root = xmlDocGetRootElement(xmlDom);
226 namespaces[0] = root->ns;
228 //TODO Ought to check root->name and root->ns->href
230 pathcontext = xmlXPathNewContext(xmlDom);
231 pathcontext->node = xmlDocGetRootElement(xmlDom);
232 pathcontext->namespaces = namespaces;
233 pathcontext->nsNr = 1;
236 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
239 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
240 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
242 item = *res->nodesetval->nodeTab;
244 // Select a line cooker based on the message type.
247 *Log << "Looking for a line cooker for " << item->content << "\n";
250 std::list<xform>::iterator lci = kitchen.begin();
251 while (pan.lc == 0 && lci != kitchen.end())
253 if (lci->lc->message_type() == (const char *)(item->content))
261 *Log << "Can not find a line cooker for message type " << item->content << "\n";
267 *Log << "Message type not found in XML header.\n";
271 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
272 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
274 item = *res->nodesetval->nodeTab;
275 hostname = (const char *)item->content;
279 *Log << "Can not determine the hostname where the message came from.\n";
283 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
284 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
286 item = *res->nodesetval->nodeTab;
287 service = (const char *)item->content;
289 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
290 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
292 item = *res->nodesetval->nodeTab;
293 arrival = String((char *)item->content);
294 if (!arrival.proper())
296 *Log << "Arrival time is not properly stated.\n";
301 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
307 /*=========================================================================
309 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
311 ** RETURN VALUE : The certainty with which the message is classified.
319 ** LAST MODIFIED : Nov 27, 2003
320 **=========================================================================
323 double client_message::classify(String host, UTC arriv, String serv)
331 const double epsilon = 0.1; // Threshold for uncertainty
332 const double P = 0.5; // Probability of a wrong match
336 /* First, check if the message has a mail header. */
338 if (input >> line && line == re_uxmail_from)
344 /* Skip the mail header until the first empty line. */
346 while (input >> line && line != "")
352 // Push the first line back, we need to read it again.
358 * Now that we have the mail header out of the way, try to figure
359 * out what the content of the message is.
365 while (input >> line && uncertainty > 0.1)
369 *Log << " testing: " << line << "\n";
375 gpg_encrypted = true;
376 *Log << "The message is PGP/GnuPG encrypted.\n";
380 // Scan the list of line cookers if there is anything familiar.
382 std::list<xform>::iterator lci = kitchen.begin();
384 while (lci != kitchen.end())
386 if (lci->lc->check_pattern(line))
388 // We have a match; decrease the uncertainty
390 lci->uncertainty *= P;
391 if (uncertainty > lci->uncertainty)
393 uncertainty = lci->uncertainty;
398 *Log << lci->lc->message_type() << " detected with "
399 << lci->uncertainty << " uncertainty.\n";
404 classification = COOKER_OBJECT;
409 certainty = 1.0 - uncertainty;
414 /*=========================================================================
416 ** SYNOPSIS : int enterXML()
418 ** RETURN VALUE : None
420 ** DESCRIPTION : Analyze the DOM tree from the XML input.
421 ** The DOM tree was previously parsed by readXMLinput().
427 ** LAST MODIFIED : Nov 28, 2003
428 **=========================================================================
431 struct param_property
437 void client_message::enterXML()
439 //TODO : return the number of elements that are handled.
441 xmlXPathObjectPtr res;
442 xmlXPathContextPtr pathcontext;
443 xmlNsPtr namespaces[1];
445 /* Try to find the host in the database */
448 String remark; // For notifications
450 objectid = database.find_host(hostname);
453 *Log << "Please define the host " << hostname << " in the database.\n";
458 *Log << "Object id for " << hostname << " is " << objectid << "\n";
461 pathcontext = xmlXPathNewContext(xmlDom);
462 pathcontext->node = xmlDocGetRootElement(xmlDom);
463 namespaces[0] = pathcontext->node->ns;
464 pathcontext->namespaces = namespaces;
465 pathcontext->nsNr = 1;
467 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
469 if (res->nodesetval != NULL)
471 // Find the first child element of the <data> element.
473 xmlNodePtr node = *res->nodesetval->nodeTab;
474 while (node->type != XML_ELEMENT_NODE)
478 if (strcmp((char *)node->name, "log") == 0)
480 // Each child contains a log entry, raw or cooked.
482 node = node->children;
485 if (node->type == XML_ELEMENT_NODE)
493 if (strcmp((char *)node->name, "raw") == 0 && node->children != NULL)
495 item = node->children;
498 *Log << "Can not cook this type of <raw> log element.\n";
502 raw = String((const char *)item->content);
503 if (pan.lc->cook_this(raw, arrival))
505 log_hostname = pan.lc->hostname();
506 if (log_hostname == "")
508 log_hostname = hostname;
510 log_service = pan.lc->service();
511 log_date = pan.lc->timestamp();
515 *Log << "gcm_input WARNING: Not a valid line: " << raw << "\n";
520 else if (strcmp((char *)node->name, "cooked") == 0)
522 // Find the parts of the log entry
526 *Log << "Analyzing cooked element.\n";
528 pathcontext->node = node;
530 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
531 if (res->nodesetval != NULL)
533 item = *res->nodesetval->nodeTab;
534 log_hostname = (const char *)item->content;
535 if (log_hostname != hostname(0, ~log_hostname))
537 *Log << "Hostname " << log_hostname << " does not match.\n";
543 log_hostname = hostname;
546 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
547 if (res->nodesetval != NULL)
549 item = *res->nodesetval->nodeTab;
550 log_service = (const char *)item->content;
554 log_service = service;
557 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
558 if (res->nodesetval != NULL)
560 item = *res->nodesetval->nodeTab;
561 log_date = String((const char *)item->content);
565 *Log << "<timestamp> missing from cooked log element.\n";
568 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
569 if (res->nodesetval != NULL)
571 item = *res->nodesetval->nodeTab;
572 raw = String((const char *)item->content);
576 *Log << "<raw> missing from cooked log element.\n";
581 // Insert a new log record into the database.
582 if (raw != "" && log_hostname != "" && log_date.proper())
584 String insertion("insert into log (objectid, servicecode,"
585 " object_timestamp, timestamp, rawdata, processed) values (");
587 /* Insert a new record into the log table */
589 insertion += "'" + objectid + "',";
590 insertion += "'" + log_service + "',";
591 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
592 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
593 insertion += "'" + SQL_Escape(raw) + "',FALSE";
598 *Log << insertion << "\n";
602 database.Query(insertion);
611 else if (strcmp((char *)node->name, "parameters") == 0)
613 // Each child contains a parameter entry, with at least one property
617 String change_notification("");
618 String create_notification("");
619 String remove_notification("");
620 bool initial_entry = false;
621 String param_class((const char *)xmlGetProp(node, (const xmlChar *)"class"));
623 pathcontext->node = node;
625 // If we don;t have any parameters of this class, this will be
628 qry = "select name from parameter where objectid='";
629 qry += objectid + "' and class='" + param_class + "'";
630 initial_entry = database.Query(qry) == 0;
632 node = node->children;
635 if (node->type == XML_ELEMENT_NODE &&
636 strcmp((char *)node->name, "parameter") == 0)
638 String param_name((const char *)xmlGetProp(node, (const xmlChar *)"name"));
640 std::list<param_property> properties;
647 // Collect the parameter's properties.
649 item = node->children;
652 if (item->type == XML_ELEMENT_NODE &&
653 strcmp((char *)item->name, "property") == 0)
655 prop.name = (const char *)xmlGetProp(item, (const xmlChar *)"name");
656 if (item->children != NULL)
658 prop.value = (const char *)item->children->content;
659 properties.push_back(prop);
663 *Log << "WARNING: Property " << prop.name << " has no value.\n";
667 // TODO: Hanlde description element
672 // Check the parameter in the database.
674 std::list<param_property>::iterator pi = properties.begin();
676 qry = "select paramid from parameter where objectid='";
677 qry += objectid + "' and class='";
678 qry += param_class + "' and name='";
679 qry += param_name + "'";
681 if (database.Query(qry) == 1)
683 // The parameter exists in the database; check all properties.
685 bool param_changed = false;
687 paramid = database.Field(0, "paramid");
688 while (pi != properties.end())
690 qry = "select value from property where paramid='";
691 qry += paramid + "' and name='";
692 qry += pi->name + "'";
693 if (database.Query(qry) == 0)
695 *Log << "Property " << pi->name << " of "
696 << param_name << " does not exist.\n";
698 else if (database.Field(0, "value") != pi->value)
700 *Log << "Property " << pi->name << " of "
701 << param_name << " is different.\n";
703 insertion = "update property set value='";
704 insertion += pi->value + "' where paramid='";
705 insertion += paramid + "' and name='";
706 insertion += pi->name + "'";
708 database.Query(insertion);
710 insertion = "insert into history (paramid, modified,";
711 insertion += " change_nature, changed_property, new_value)";
712 insertion += " values ('";
713 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
714 + "', 'MODIFIED', '";
715 insertion += pi->name + "', '";
716 insertion += pi->value + "')";
718 database.Query(insertion);
720 param_changed = true;
729 if (change_notification == "")
731 remark = "Gnucomo detected a different version for package parameter(s) ";
732 change_notification = database.new_notification(objectid,
733 "property modified", remark);
736 if (change_notification != "")
738 qry = "select * from parameter_notification where notificationid='";
739 qry += change_notification + "' and paramid='";
740 qry += paramid + "'";
742 if (database.Query(qry) == 0)
744 insertion = "insert into parameter_notification";
745 insertion += " (notificationid, paramid) values ('";
746 insertion += change_notification + "', '";
747 insertion += paramid + "')";
749 database.Query(insertion);
754 *Log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
760 // The parameter does not exist; create anew.
762 // TODO: Insert description
764 insertion = "insert into parameter (objectid, name, class, description) values ('";
765 insertion += objectid + "', '" + param_name + "', '" + param_class + "', '')";
766 database.Query(insertion);
768 qry = "select paramid from parameter where objectid='";
769 qry += objectid + "' and class='";
770 qry += param_class + "' and name='";
771 qry += param_name + "'";
773 paramid = database.Field(0, "paramid");
775 while (pi != properties.end())
777 insertion = "insert into property (paramid, name, value, type) values ('";
778 insertion += paramid + "', '";
779 insertion += pi->name + "', '";
780 insertion += pi->value + "', 'STATIC')";
781 database.Query(insertion);
783 insertion = "insert into history (paramid, modified,";
784 insertion += " change_nature, changed_property, new_value)";
785 insertion += " values ('";
786 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
788 insertion += pi->name + "', '";
789 insertion += pi->value + "')";
790 database.Query(insertion);
797 if (create_notification == "")
799 remark = "Gnucomo detected new parameter(s) of class package";
800 create_notification = database.new_notification(objectid,
801 "parameter created", remark);
803 if (create_notification != "")
805 insertion = "insert into parameter_notification";
806 insertion += " (notificationid, paramid) values ('";
807 insertion += create_notification + "', '";
808 insertion += paramid + "')";
810 database.Query(insertion);
814 *Log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
825 // Check if any parameters in this class have disappeared.
827 qry = "select name, paramid from parameter where objectid='";
828 qry += objectid + "' and class='" + param_class + "'";
830 int nr_parameters = database.Query(qry);
831 pqxx::Result parameter_set = database.Result();
833 for (int i = 0; i < nr_parameters; i++)
836 String param_name, paramid;
838 param_name = database.Field(parameter_set, i, "name");
839 XPath = "gcmt:parameter[@name='" + param_name + "']";
841 res = xmlXPathEval((const xmlChar *)(const char *)XPath, pathcontext);
842 if (res->nodesetval->nodeTab == NULL)
844 // The parameter is in the database but not in the report
846 paramid = database.Field(parameter_set, i, "paramid");
847 qry ="select change_nature from history where paramid='";
848 qry += paramid + "' order by modified desc";
849 if (database.Query(qry) <= 0)
851 *Log << "Database ERROR: no history record for parameter "
852 << param_name << ".\n";
854 else if (database.Field(0, "change_nature") != "REMOVED")
858 *Log << "Removing parameter " << param_name << ".\n";
861 insertion = "insert into history (paramid, modified, change_nature)";
862 insertion += " values ('";
863 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
866 database.Query(insertion);
868 if (remove_notification == "")
870 remark = "Gnucomo detected that package(s) have disappeared ";
871 remove_notification = database.new_notification(objectid,
872 "parameter removed", remark);
875 if (remove_notification != "")
877 insertion = "insert into parameter_notification";
878 insertion += " (notificationid, paramid) values ('";
879 insertion += remove_notification + "', '";
880 insertion += paramid + "')";
882 database.Query(insertion);
886 *Log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
895 *Log << "Data element " << node->name << " is not supported.\n";
900 *Log << "Data node not found.\n";
904 /*=========================================================================
906 ** SYNOPSIS : int enter()
908 ** RETURN VALUE : The number of lines successfully parsed from the input
916 ** LAST MODIFIED : Nov 26, 2003
917 **=========================================================================
920 int client_message::enter()
922 pan.mf->set_message_type(pan.lc->message_type());
924 pan.mf->construct_XML(input, xmlBuffer);
927 *Log << "Constructed XML document:\n\n";
928 *Log << xmlBuffer.str();
932 xmlDom = xmlParseMemory(xmlBuffer.str(), xmlBuffer.pcount());
943 *Log << "XML parser FAILED.\n";