2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.17 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Nov 28, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.17 2005-05-31 05:51:41 arjen
30 Textual changes in parameter notifications
32 Revision 1.16 2003/12/04 10:38:09 arjen
33 Major redesign. All input is handled through XML. Raw input data is first
34 transformed into an XML document for further processing.
35 A collection of polymorphic classes handle the transformation of various
36 input formats into XML.
37 Classifying input data is done with a finite improbability calculation.
39 Revision 1.15 2003/10/27 11:28:27 arjen
40 Do not add another parameter_notification record is the notification
41 already exists for that parameter.
43 Revision 1.14 2003/09/01 06:57:14 arjen
44 Reject log entries that are found to be invalid.
46 Revision 1.13 2003/08/16 15:28:45 arjen
47 Fixed a namespace problem
49 Revision 1.12 2003/08/11 16:56:16 arjen
50 Different kinds of log files are parsed by a collection of objects
51 of different classes, derived from the base class line_cooker
52 Depending on the message content or the message_type element in
53 XML, one of these objects is selected.
55 Logrunner is integrated with gcm_input. Although its functionality
56 is still limited, a connection between logrunner and gcm_input
59 Revision 1.11 2003/08/05 08:15:00 arjen
60 Debug output to the log stream instead of cerr.
61 Fixed namespace problems in XPath searches of the DOM.
62 Moved string utility functions to a separate file.
64 Revision 1.10 2003/04/29 09:16:44 arjen
66 Only cooked log entries for now.
68 Revision 1.9 2003/03/29 09:04:10 arjen
69 Extract the hostname out of the 'From:' or 'Message-Id:' line
72 Revision 1.8 2003/03/16 09:42:40 arjen
73 Read IRIX system logs.
75 Revision 1.7 2003/02/21 08:08:05 arjen
76 Gcm_input also detects packages that are removed from the system.
77 Determining the version number of a package in a RPM
78 list is improved. Only the last one or two parts of the string that
79 begin with a '-' and a number are considered the version.
81 Revision 1.6 2003/02/05 09:37:51 arjen
82 Create notifications when a new package is discovered
83 in a 'rpm -qa' list or when the version of a package is changed.
85 Revision 1.4 2002/12/06 22:26:28 arjen
86 Set the value of log.processed to FALSE when inserting a
87 new log entry into the database
88 When a syslog entry arrives from last year, gcm_input subtracts one from the
89 year of arrival to create the year of the log entry.
90 Read output from "rpm -qa" and enter packages in the parameter table.
92 Revision 1.3 2002/11/09 08:04:27 arjen
93 Added a reference to the GPL
95 Revision 1.2 2002/11/04 10:13:36 arjen
96 Use proper namespace for iostream classes
98 Revision 1.1 2002/10/05 10:25:49 arjen
99 Creation of gcm_input and a first approach to a web interface
101 *****************************/
103 static const char *RCSID = "$Id: message.cpp,v 1.17 2005-05-31 05:51:41 arjen Exp $";
106 #include <libxml/xpath.h>
107 #include <libxml/debugXML.h>
112 extern bool verbose; /* Defined in the main application */
113 extern bool testmode;
114 extern bool incremental;
115 extern std::ostream *Log;
117 /* Utility functions */
119 extern String SQL_Escape(String s);
121 /*=========================================================================
122 ** NAME : operator >>
123 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
125 ** RETURN VALUE : True if input was available.
127 ** DESCRIPTION : Input operator. Read the next line from the message.
133 ** LAST MODIFIED : Nov 04, 2002
134 **=========================================================================
137 bool operator >> (message_buffer &b, String &s)
139 bool input_ok = false;
141 if (b.next_line == b.buffer.end())
147 b.buffer.push_back(l);
149 // next_line keeps pointing to the end.
164 /*=========================================================================
165 ** NAME : client_message
166 ** SYNOPSIS : client_message(std::istream *in, gnucomo_database db)
168 ** RETURN VALUE : None
170 ** DESCRIPTION : Client message constructor.
176 ** LAST MODIFIED : Nov 04, 2002
177 **=========================================================================
180 client_message::client_message(std::istream *in, gnucomo_database db)
187 gpg_encrypted = false;
188 classification = UNKNOWN;
193 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
195 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
196 static const regex re_dump("^ *DUMP: Date of this level");
197 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
199 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
200 static const regex re_xml_header("xml .*\?>$");
202 /*=========================================================================
203 ** NAME : extractHeader
204 ** SYNOPSIS : void extractHeader()
206 ** RETURN VALUE : True if the mandatory header elements are available.
208 ** DESCRIPTION : Extract the header information from the XML DOM tree.
214 ** LAST MODIFIED : Nov 26, 2003
215 **=========================================================================
218 bool client_message::extractHeader()
220 xmlNodePtr root, item;
221 xmlNsPtr namespaces[1];
223 xmlXPathObjectPtr res;
224 xmlXPathContextPtr pathcontext;
226 bool header_OK = true;
228 root = xmlDocGetRootElement(xmlDom);
229 namespaces[0] = root->ns;
231 //TODO Ought to check root->name and root->ns->href
233 pathcontext = xmlXPathNewContext(xmlDom);
234 pathcontext->node = xmlDocGetRootElement(xmlDom);
235 pathcontext->namespaces = namespaces;
236 pathcontext->nsNr = 1;
239 xmlDebugDumpNodeList(stdout, pathcontext->node, 0);
242 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:messagetype/text()", pathcontext);
243 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
245 item = *res->nodesetval->nodeTab;
247 // Select a line cooker based on the message type.
250 *Log << "Looking for a line cooker for " << item->content << "\n";
253 std::list<xform>::iterator lci = kitchen.begin();
254 while (pan.lc == 0 && lci != kitchen.end())
256 if (lci->lc->message_type() == (const char *)(item->content))
264 *Log << "Can not find a line cooker for message type " << item->content << "\n";
270 *Log << "Message type not found in XML header.\n";
274 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:hostname/text()", pathcontext);
275 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
277 item = *res->nodesetval->nodeTab;
278 hostname = (const char *)item->content;
282 *Log << "Can not determine the hostname where the message came from.\n";
286 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:service/text()", pathcontext);
287 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
289 item = *res->nodesetval->nodeTab;
290 service = (const char *)item->content;
292 res = xmlXPathEval((const xmlChar *)"gcmt:header/gcmt:time/text()", pathcontext);
293 if (res->nodesetval != NULL && res->nodesetval->nodeTab != NULL)
295 item = *res->nodesetval->nodeTab;
296 arrival = String((char *)item->content);
297 if (!arrival.proper())
299 *Log << "Arrival time is not properly stated.\n";
304 //xmlDebugDumpNodeList(stdout, *res->nodesetval->nodeTab, 0);
310 /*=========================================================================
312 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
314 ** RETURN VALUE : The certainty with which the message is classified.
322 ** LAST MODIFIED : Nov 27, 2003
323 **=========================================================================
326 double client_message::classify(String host, UTC arriv, String serv)
334 const double epsilon = 0.1; // Threshold for uncertainty
335 const double P = 0.5; // Probability of a wrong match
340 *Log << "Checking for a mail header.\n";
343 /* First, check if the message has a mail header. */
345 if (input >> line && line == re_uxmail_from)
351 /* Skip the mail header until the first empty line. */
353 while (input >> line && line != "")
359 // Push the first line back, we need to read it again.
365 * Now that we have the mail header out of the way, try to figure
366 * out what the content of the message is.
370 *Log << "Classifying message.\n";
376 while (input >> line && uncertainty > 0.1)
380 *Log << " testing: " << line << "\n";
386 gpg_encrypted = true;
387 *Log << "The message is PGP/GnuPG encrypted.\n";
391 // Scan the list of line cookers if there is anything familiar.
393 std::list<xform>::iterator lci = kitchen.begin();
395 while (lci != kitchen.end())
397 if (lci->lc->check_pattern(line))
399 // We have a match; decrease the uncertainty
401 lci->uncertainty *= P;
402 if (uncertainty > lci->uncertainty)
404 uncertainty = lci->uncertainty;
409 *Log << lci->lc->message_type() << " detected with "
410 << lci->uncertainty << " uncertainty.\n";
415 classification = COOKER_OBJECT;
419 //TODO: If uncertainty is still too great, pick the least uncertain.
423 certainty = 1.0 - uncertainty;
428 /*=========================================================================
430 ** SYNOPSIS : int enterXML()
432 ** RETURN VALUE : None
434 ** DESCRIPTION : Analyze the DOM tree from the XML input.
435 ** The DOM tree was previously parsed by readXMLinput().
441 ** LAST MODIFIED : Nov 28, 2003
442 **=========================================================================
445 struct param_property
451 void client_message::enterXML()
453 //TODO : return the number of elements that are handled.
455 xmlXPathObjectPtr res;
456 xmlXPathContextPtr pathcontext;
457 xmlNsPtr namespaces[1];
459 /* Try to find the host in the database */
462 String remark; // For notifications
464 objectid = database.find_host(hostname);
467 *Log << "Please define the host " << hostname << " in the database.\n";
472 *Log << "Object id for " << hostname << " is " << objectid << "\n";
475 pathcontext = xmlXPathNewContext(xmlDom);
476 pathcontext->node = xmlDocGetRootElement(xmlDom);
477 namespaces[0] = pathcontext->node->ns;
478 pathcontext->namespaces = namespaces;
479 pathcontext->nsNr = 1;
481 res = xmlXPathEval((const xmlChar *)"gcmt:data/node()", pathcontext);
483 if (res->nodesetval != NULL)
485 // Find the first child element of the <data> element.
487 xmlNodePtr node = *res->nodesetval->nodeTab;
488 while (node->type != XML_ELEMENT_NODE)
492 if (strcmp((char *)node->name, "log") == 0)
494 // Each child contains a log entry, raw or cooked.
496 node = node->children;
499 if (node->type == XML_ELEMENT_NODE)
507 if (strcmp((char *)node->name, "raw") == 0 && node->children != NULL)
509 item = node->children;
512 *Log << "Can not cook this type of <raw> log element.\n";
516 raw = String((const char *)item->content);
517 if (pan.lc->cook_this(raw, arrival))
519 log_hostname = pan.lc->hostname();
520 if (log_hostname == "")
522 log_hostname = hostname;
524 log_service = pan.lc->service();
525 log_date = pan.lc->timestamp();
529 *Log << "gcm_input WARNING: Not a valid line: " << raw << "\n";
534 else if (strcmp((char *)node->name, "cooked") == 0)
536 // Find the parts of the log entry
540 *Log << "Analyzing cooked element.\n";
542 pathcontext->node = node;
544 res = xmlXPathEval((const xmlChar *)"hostname/text()", pathcontext);
545 if (res->nodesetval != NULL)
547 item = *res->nodesetval->nodeTab;
548 log_hostname = (const char *)item->content;
549 if (log_hostname != hostname(0, ~log_hostname))
551 *Log << "Hostname " << log_hostname << " does not match.\n";
557 log_hostname = hostname;
560 res = xmlXPathEval((const xmlChar *)"service/text()", pathcontext);
561 if (res->nodesetval != NULL)
563 item = *res->nodesetval->nodeTab;
564 log_service = (const char *)item->content;
568 log_service = service;
571 res = xmlXPathEval((const xmlChar *)"timestamp/text()", pathcontext);
572 if (res->nodesetval != NULL)
574 item = *res->nodesetval->nodeTab;
575 log_date = String((const char *)item->content);
579 *Log << "<timestamp> missing from cooked log element.\n";
582 res = xmlXPathEval((const xmlChar *)"raw/text()", pathcontext);
583 if (res->nodesetval != NULL)
585 item = *res->nodesetval->nodeTab;
586 raw = String((const char *)item->content);
590 *Log << "<raw> missing from cooked log element.\n";
595 // Insert a new log record into the database.
596 if (raw != "" && log_hostname != "" && log_date.proper())
598 String insertion("insert into log (objectid, servicecode,"
599 " object_timestamp, timestamp, rawdata, processed) values (");
601 /* Insert a new record into the log table */
603 insertion += "'" + objectid + "',";
604 insertion += "'" + log_service + "',";
605 insertion += "'" + log_date.format("%Y-%m-%d %T") + "',";
606 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
607 insertion += "'" + SQL_Escape(raw) + "',FALSE";
612 *Log << insertion << "\n";
616 database.Query(insertion);
625 else if (strcmp((char *)node->name, "parameters") == 0)
627 // Each child contains a parameter entry, with at least one property
631 String change_notification("");
632 String create_notification("");
633 String remove_notification("");
634 bool initial_entry = false;
635 String param_class((const char *)xmlGetProp(node, (const xmlChar *)"class"));
638 *Log << "Entering a list of " << param_class << " parameters.\n";
640 pathcontext->node = node;
642 // If we don;t have any parameters of this class, this will be
645 qry = "select name from parameter where objectid='";
646 qry += objectid + "' and class='" + param_class + "'";
647 initial_entry = database.Query(qry) == 0;
649 node = node->children;
652 if (node->type == XML_ELEMENT_NODE &&
653 strcmp((char *)node->name, "parameter") == 0)
655 String param_name((const char *)xmlGetProp(node, (const xmlChar *)"name"));
658 *Log << "Parameter with name " << param_name << "\n";
660 std::list<param_property> properties;
667 // Collect the parameter's properties.
669 item = node->children;
672 if (item->type == XML_ELEMENT_NODE &&
673 strcmp((char *)item->name, "property") == 0)
675 prop.name = (const char *)xmlGetProp(item, (const xmlChar *)"name");
676 if (item->children != NULL)
678 prop.value = (const char *)item->children->content;
679 properties.push_back(prop);
683 *Log << "WARNING: Property " << prop.name << " has no value.\n";
687 // TODO: Hanlde description element
692 // Check the parameter in the database.
694 std::list<param_property>::iterator pi = properties.begin();
696 qry = "select paramid from parameter where objectid='";
697 qry += objectid + "' and class='";
698 qry += param_class + "' and name='";
699 qry += param_name + "'";
701 if (database.Query(qry) == 1)
703 // The parameter exists in the database; check all properties.
705 bool param_changed = false;
707 paramid = database.Field(0, "paramid");
708 while (pi != properties.end())
710 qry = "select value from property where paramid='";
711 qry += paramid + "' and name='";
712 qry += pi->name + "'";
713 if (database.Query(qry) == 0)
715 *Log << "Property " << pi->name << " of "
716 << param_name << " does not exist.\n";
718 else if (database.Field(0, "value") != pi->value)
720 *Log << "Property " << pi->name << " of "
721 << param_name << " is different.\n";
723 insertion = "update property set value='";
724 insertion += pi->value + "' where paramid='";
725 insertion += paramid + "' and name='";
726 insertion += pi->name + "'";
728 database.Query(insertion);
730 insertion = "insert into history (paramid, modified,";
731 insertion += " change_nature, changed_property, new_value)";
732 insertion += " values ('";
733 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
734 + "', 'MODIFIED', '";
735 insertion += pi->name + "', '";
736 insertion += pi->value + "')";
738 database.Query(insertion);
740 param_changed = true;
749 if (change_notification == "")
751 remark = "Gnucomo detected a different property for parameter(s) ";
752 change_notification = database.new_notification(objectid,
753 "property modified", remark);
756 if (change_notification != "")
758 qry = "select * from parameter_notification where notificationid='";
759 qry += change_notification + "' and paramid='";
760 qry += paramid + "'";
762 if (database.Query(qry) == 0)
764 insertion = "insert into parameter_notification";
765 insertion += " (notificationid, paramid) values ('";
766 insertion += change_notification + "', '";
767 insertion += paramid + "')";
769 database.Query(insertion);
774 *Log << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
780 // The parameter does not exist; create anew.
782 // TODO: Insert description
784 insertion = "insert into parameter (objectid, name, class, description) values ('";
785 insertion += objectid + "', '" + param_name + "', '" + param_class + "', '')";
786 database.Query(insertion);
788 qry = "select paramid from parameter where objectid='";
789 qry += objectid + "' and class='";
790 qry += param_class + "' and name='";
791 qry += param_name + "'";
793 paramid = database.Field(0, "paramid");
795 while (pi != properties.end())
797 insertion = "insert into property (paramid, name, value, type) values ('";
798 insertion += paramid + "', '";
799 insertion += pi->name + "', '";
800 insertion += pi->value + "', 'STATIC')";
801 database.Query(insertion);
803 insertion = "insert into history (paramid, modified,";
804 insertion += " change_nature, changed_property, new_value)";
805 insertion += " values ('";
806 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
808 insertion += pi->name + "', '";
809 insertion += pi->value + "')";
810 database.Query(insertion);
817 if (create_notification == "")
819 remark = "Gnucomo detected new parameter(s) of class " + param_class;
820 create_notification = database.new_notification(objectid,
821 "parameter created", remark);
823 if (create_notification != "")
825 insertion = "insert into parameter_notification";
826 insertion += " (notificationid, paramid) values ('";
827 insertion += create_notification + "', '";
828 insertion += paramid + "')";
830 database.Query(insertion);
834 *Log << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
845 // Check if any parameters in this class have disappeared.
847 qry = "select name, paramid from parameter where objectid='";
848 qry += objectid + "' and class='" + param_class + "'";
850 int nr_parameters = database.Query(qry);
851 pqxx::Result parameter_set = database.Result();
853 for (int i = 0; i < nr_parameters; i++)
856 String param_name, paramid;
858 param_name = database.Field(parameter_set, i, "name");
859 XPath = "gcmt:parameter[@name='" + param_name + "']";
861 res = xmlXPathEval((const xmlChar *)(const char *)XPath, pathcontext);
862 if (res->nodesetval->nodeTab == NULL)
864 // The parameter is in the database but not in the report
867 *Log << "Could not find " << XPath << " in XML tree.\n";
869 paramid = database.Field(parameter_set, i, "paramid");
870 qry ="select change_nature from history where paramid='";
871 qry += paramid + "' order by modified desc";
872 if (database.Query(qry) <= 0)
874 *Log << "Database ERROR: no history record for parameter "
875 << param_name << ".\n";
877 else if (database.Field(0, "change_nature") != "REMOVED")
881 *Log << "Removing parameter " << param_name << ".\n";
884 insertion = "insert into history (paramid, modified, change_nature)";
885 insertion += " values ('";
886 insertion += paramid + "', '" + arrival.format("%Y-%m-%d %T")
889 database.Query(insertion);
891 if (remove_notification == "")
893 remark = "Gnucomo detected that " + param_class
894 + " parameters(s) have disappeared ";
895 remove_notification = database.new_notification(objectid,
896 "parameter removed", remark);
899 if (remove_notification != "")
901 insertion = "insert into parameter_notification";
902 insertion += " (notificationid, paramid) values ('";
903 insertion += remove_notification + "', '";
904 insertion += paramid + "')";
906 database.Query(insertion);
910 *Log << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
919 *Log << "Data element " << node->name << " is not supported.\n";
924 *Log << "Data node not found.\n";
928 /*=========================================================================
930 ** SYNOPSIS : int enter()
932 ** RETURN VALUE : The number of lines successfully parsed from the input
940 ** LAST MODIFIED : Nov 26, 2003
941 **=========================================================================
944 int client_message::enter()
946 pan.mf->set_message_type(pan.lc->message_type());
948 pan.mf->construct_XML(input, xmlBuffer);
951 *Log << "Constructed XML document:\n\n";
952 *Log << xmlBuffer.str();
956 xmlDom = xmlParseMemory(xmlBuffer.str(), xmlBuffer.pcount());
967 *Log << "XML parser FAILED.\n";