2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.9 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Mar 28, 2003
25 **************************************************************************/
27 /*****************************
29 Revision 1.9 2003-03-29 09:04:10 arjen
30 Extract the hostname out of the 'From:' or 'Message-Id:' line
33 Revision 1.8 2003/03/16 09:42:40 arjen
34 Read IRIX system logs.
36 Revision 1.7 2003/02/21 08:08:05 arjen
37 Gcm_input also detects packages that are removed from the system.
38 Determining the version number of a package in a RPM
39 list is improved. Only the last one or two parts of the string that
40 begin with a '-' and a number are considered the version.
42 Revision 1.6 2003/02/05 09:37:51 arjen
43 Create notifications when a new package is discovered
44 in a 'rpm -qa' list or when the version of a package is changed.
46 Revision 1.4 2002/12/06 22:26:28 arjen
47 Set the value of log.processed to FALSE when inserting a
48 new log entry into the database
49 When a syslog entry arrives from last year, gcm_input subtracts one from the
50 year of arrival to create the year of the log entry.
51 Read output from "rpm -qa" and enter packages in the parameter table.
53 Revision 1.3 2002/11/09 08:04:27 arjen
54 Added a reference to the GPL
56 Revision 1.2 2002/11/04 10:13:36 arjen
57 Use proper namespace for iostream classes
59 Revision 1.1 2002/10/05 10:25:49 arjen
60 Creation of gcm_input and a first approach to a web interface
62 *****************************/
64 static const char *RCSID = "$Id: message.cpp,v 1.9 2003-03-29 09:04:10 arjen Exp $";
69 extern bool verbose; /* Defined in the main application */
72 /* Utility functions */
74 String SQL_Escape(String s);
76 /*=========================================================================
78 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
80 ** RETURN VALUE : True if input was available.
82 ** DESCRIPTION : Input operator. Read the next line from the message.
88 ** LAST MODIFIED : Nov 04, 2002
89 **=========================================================================
92 bool operator >> (message_buffer &b, String &s)
94 bool input_ok = false;
96 if (b.next_line == b.buffer.end())
102 b.buffer.push_back(l);
104 // next_line keeps pointing to the end.
119 client_message::client_message(std::istream *in, gnucomo_database db)
126 gpg_encrypted = false;
127 classification = UNKNOWN;
131 static const String syslog_date_re("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
132 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
133 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
134 static const String email_address_re("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
136 static const regex re_syslog(syslog_date_re + " [[:alnum:]]+ [[:alpha:]]+.*:.+");
137 static const regex re_syslog_irix(syslog_date_re + " [0-7][A-T]:[[:alnum:]]+ [[:alpha:]]+.*:.+");
138 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
139 static const regex re_dump("^ *DUMP: Date of this level");
140 static const regex re_accesslog("(GET|POST) .+ HTTP");
141 static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
142 static const regex re_rpm("[[:alnum:]+-]+-[0-9][[:alnum:].-]");
144 static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
145 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
146 static const regex re_mail_From("^From:[[:blank:]]+");
147 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
148 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
149 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
150 static const regex re_email_user("[[:alnum:]_.-]+@");
152 /*=========================================================================
154 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
156 ** RETURN VALUE : The certainty with which the message is classified.
164 ** LAST MODIFIED : Mar 28, 2003
165 **=========================================================================
168 double client_message::classify(String host, UTC arriv, String serv)
176 /* First, check if the message has a mail header. */
178 if (input >> line && line == re_uxmail_from)
184 /* Scan ahead for the hostname and date of arrival. */
186 while (input >> line && line != "")
188 if (line == re_mail_From)
190 from_address = line(re_email_address);
191 from_address(re_email_user) = ""; // Remove the user part;
192 if (from_address != "")
194 hostname = from_address;
197 if (line == re_mail_MsId)
199 from_address = line(re_email_address);
200 from_address(re_email_user) = ""; // Remove the user part;
201 if (from_address != "")
203 hostname = from_address;
206 if (line == re_mail_Date)
208 arrival = UTC(line(regex(mail_date_re)));
214 // Push the first line back, we need to read it again.
220 * Now that we have the mail header out of the way, try to figure
221 * out what the content of the message is.
225 while (input >> line && certainty < 0.9)
227 std::cout << " testing: " << line << "\n";
228 if (line == re_syslog)
231 classification = SYSLOG;
234 std::cout << "Syslog detected.\n";
237 else if (line == re_syslog_irix)
240 classification = SYSLOG_IRIX;
243 std::cout << "IRIX Syslog detected.\n";
246 else if (line == re_PGP)
249 gpg_encrypted = true;
250 std::cerr << "The message is PGP/GnuPG encrypted.\n";
252 else if (line == re_dump)
257 std::cout << "DUMP output detected.\n";
260 else if (line == re_accesslog)
263 classification = ACCESSLOG;
267 std::cout << "HTTP access log detected.\n";
270 else if (line == re_errorlog)
273 classification = ERRORLOG;
277 std::cout << "HTTP error log detected.\n";
280 else if (line == re_rpm)
283 classification = RPMLIST;
287 std::cout << "RPM package list detected.\n";
295 std::cerr << "Can not determine the hostname where the message came from.\n";
298 else if (!arrival.proper())
300 std::cerr << "Arrival time is not knwon.\n";
311 /*=========================================================================
313 ** SYNOPSIS : int enter()
315 ** RETURN VALUE : The number of lines successfully parsed from the input
323 ** LAST MODIFIED : Mar 28, 2003
324 **=========================================================================
327 int client_message::enter()
333 String change_notification("");
334 String create_notification("");
335 bool initial_entry = false;
337 std::list<String> packages;
340 /* Double-check the classification of the message */
342 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
349 // Skip the mail header.
351 while (input >> line && line != "");
354 /* Try to find the host in the database */
358 objectid = database.find_host(hostname);
361 std::cerr << "Please define the host " << hostname << " in the database.\n";
366 std::cout << "Object id for " << hostname << " is " << objectid << "\n";
369 if (classification == RPMLIST)
374 /* Read all packages, so we will know which ones are */
375 /* missing at the end. */
377 qry = "select name from parameter where objectid='";
378 qry += objectid + "' and class='package'";
379 n_packages = database.Query(qry);
380 initial_entry = n_packages == 0;
382 std::cout << n_packages << " packages in database.\n";
383 for (int t = 0; t < n_packages; t++)
385 packages.push_back(database.Field(t, "name"));
387 std::cout << "Package list built: " << packages.size() << ".\n";
390 /* Scan the input line by line, entring records into the database */
392 String rest; // Rest of the line to be parsed
394 while (input >> line)
398 std::cout << line << "\n";
402 /* Check each line if it contains valid information */
406 switch (classification)
412 check = &re_syslog_irix;
415 check = &re_accesslog;
418 check = &re_errorlog;
431 String insertion("insert into log (objectid, servicecode,"
432 " object_timestamp, timestamp, rawdata, processed) values (");
435 switch (classification)
440 if (log_date.Year() < 0 || log_date.Year() > 2500)
442 // The year is not in the log file. Assume the year of arrival,
443 // unless this puts the log entry at a later date than the arrival date.
444 // This happens e.g. when a log entry from December arrives in Januari.
446 log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
447 if (log_date > date(arrival))
449 log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
455 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
459 if (rest(0,i) == hostname(0,i))
464 std::cout << " Hostname matches.\n";
465 std::cout << " rest = " << rest << "\n";
467 for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
470 std::cout << " Service name = " << rest(0,i) << "\n";
473 /* Insert a new record into the log table */
475 insertion += "'" + objectid + "',";
476 insertion += "'" + rest(0,i) + "',";
477 insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
478 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
479 insertion += "'" + SQL_Escape(line) + "',FALSE";
484 std::cout << insertion << "\n";
488 database.Query(insertion);
500 std::cerr << " Hostname " << rest(0,i) << " does not match.\n";
507 if (log_date.Year() < 0 || log_date.Year() > 2500)
509 // The year is not in the log file. Assume the year of arrival,
510 // unless this puts the log entry at a later date than the arrival date.
511 // This happens e.g. when a log entry from December arrives in Januari.
513 log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
514 if (log_date > date(arrival))
516 log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year() - 1);
522 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
526 if (rest(0,i) == hostname(0,i))
531 std::cout << " Hostname matches.\n";
532 std::cout << " rest = " << rest << "\n";
534 for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
537 std::cout << " Service name = " << rest(0,i) << "\n";
540 /* Insert a new record into the log table */
542 insertion += "'" + objectid + "',";
543 insertion += "'" + rest(0,i) + "',";
544 insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
545 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
546 insertion += "'" + SQL_Escape(line) + "',FALSE";
551 std::cout << insertion << "\n";
555 database.Query(insertion);
567 std::cerr << " Hostname " << rest(0,i) << " does not match.\n";
572 datestring = line(regex("\\[.+\\]"));
575 datestring[datestring.index(':')] = ' ';
576 log_date = datestring;
577 log_time = datestring;
580 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
582 insertion += "'" + objectid + "',";
583 insertion += "'" + service + "',";
584 insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
585 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
586 insertion += "'" + SQL_Escape(line) + "',FALSE";
591 std::cout << insertion << "\n";
595 database.Query(insertion);
607 datestring = line(regex("\\[.+\\]"));
610 log_date = datestring;
611 log_time = datestring;
614 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
616 insertion += "'" + objectid + "',";
617 insertion += "'" + service + "',";
618 insertion += "'" + log_date.format("%Y-%m-%d") + " " + log_time.format() + "',";
619 insertion += "'" + arrival.format("%Y-%m-%d %T") + "',";
620 insertion += "'" + SQL_Escape(line) + "',FALSE";
625 std::cout << insertion << "\n";
629 database.Query(insertion);
641 // Scan a list of packages and versions from "rpm -a".
642 // A similar listing can be created on IRIX 6.5 by using the
643 // command "showprods -3 -n|awk '{printf "%s-%s\n",$2,$3}'|grep -v '^[-=]' \
644 // |grep -v Version-Description".
646 // We have to separate the package name and the version.
647 // The separation is marked by a '-', followed by a digit.
648 // However, there may be other sequences of '-'digit in the package name,
649 // do we have to scan ahead until there is at most one such sequence
650 // left in the version string. The '-'digit seqeunce inside the
651 // version usually separates the version and the release number.
653 int version_start, next_version_start;
657 next_version_start = i;
659 while (i < ~line - 1)
661 while (i < ~line - 1 && !(line[i] == '-' && isdigit(line[i + 1])))
667 version_start = next_version_start;
668 next_version_start = i;
673 if (!isdigit(line[version_start + 1]))
675 version_start = next_version_start;
677 String package(line(0,version_start));
678 String version(line(version_start + 1, ~line));
685 std::cout << "Package is " << package;
686 std::cout << ", version is " << version << "\n";
689 // Construct a qry to check the package's existance
691 qry = "select paramid from parameter where objectid='";
692 qry += objectid + "' and class='package' and name='";
693 qry += package + "'";
695 if (database.Query(qry) == 1)
697 std::list<String>::iterator lp;
699 lp = find(packages.begin(), packages.end(), package);
700 if (lp != packages.end())
706 std::cerr << "Could NOT find " << package << " in list.\n";
709 paramid = database.Field(0, "paramid");
710 qry = "select value from property where paramid='";
711 qry += paramid + "' and name='version'";
712 if (database.Query(qry) == 0)
714 std::cerr << "Database corruption: Package " << package;
715 std::cerr << " does not have a 'version' property.\n";
717 else if (database.Field(0, "value") != version)
721 std::cout << " Parameter " << package << " has different version\n";
723 insertion = "update property set value='";
724 insertion += version + "' where paramid='";
725 insertion += paramid + "' and name='version'";
727 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
728 insert_h += " values ('";
729 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'MODIFIED', 'version', '";
730 insert_h += version + "')";
732 database.Query(insertion);
733 database.Query(insert_h);
735 if (change_notification == "")
737 remark = "Gnucomo detected a different version for package parameter(s) ";
738 change_notification = database.new_notification(objectid, "property modified", remark);
741 if (change_notification != "")
743 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
744 insertion += change_notification + "', '";
745 insertion += paramid + "')";
747 database.Query(insertion);
751 std::cerr << "gcm_input ERROR: Cannot create 'property modified' notification.\n";
758 std::cout << " Parameter " << package << " has not changed.\n";
767 std::cout << " Parameter " << package << " does not exist.\n";
769 // Create a new package parameter, including version property and history record
771 insertion = "insert into parameter (objectid, name, class, description) values ('";
772 insertion += objectid + "', '" + package + "', 'package', 'RPM package " + package + "')";
776 std::cout << insertion << "\n";
780 database.Query(insertion);
781 qry = "select paramid from parameter where objectid='";
782 qry += objectid + "' and class='package' and name='";
783 qry += package + "'";
785 paramid = database.Field(0, "paramid");
788 insertion = "insert into property (paramid, name, value, type) values ('";
789 insertion += paramid + "', 'version', '";
790 insertion += version + "', 'STATIC')";
791 insert_h = "insert into history (paramid, modified, change_nature, changed_property, new_value)";
792 insert_h += " values ('";
793 insert_h += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'CREATED', 'version', '";
794 insert_h += version + "')";
798 std::cout << insertion << "\n" << insert_h << "\n";
802 database.Query(insertion);
803 database.Query(insert_h);
806 if (create_notification == "")
808 remark = "Gnucomo detected new parameter(s) of class package";
809 create_notification = database.new_notification(objectid, "parameter created", remark);
811 if (create_notification != "")
813 insertion = "insert into parameter_notification (notificationid, paramid) values ('";
814 insertion += create_notification + "', '";
815 insertion += paramid + "')";
817 database.Query(insertion);
821 std::cerr << "gcm_input ERROR: Cannot create 'parameter created' notification.\n";
839 std::cerr << "gcm_input WARNING: Not a valid line: " << line << "\n";
843 if (classification == RPMLIST)
845 std::list<String>::iterator lp;
846 String remove_notification("");
849 * If there are any packages left in the list, they seem to have
850 * disappeared from the system.
853 for (lp = packages.begin(); lp != packages.end(); lp++)
859 // Construct a qry to check the package's existance
861 qry = "select paramid from parameter where objectid='";
862 qry += objectid + "' and class='package' and name='";
865 if (database.Query(qry) == 1)
867 paramid = database.Field(0, "paramid");
868 qry ="select change_nature from history where paramid='";
869 qry += paramid + "' order by modified desc";
870 if (database.Query(qry) <= 0)
872 std::cerr << "Database ERROR: no history record for parameter " << *lp << ".\n";
874 else if (database.Field(0, "change_nature") != "REMOVED")
878 std::cout << "Removing parameter " << *lp << ".\n";
881 insert = "insert into history (paramid, modified, change_nature)";
882 insert += " values ('";
883 insert += paramid + "', '" + arrival.format("%Y-%m-%d %T") + "', 'REMOVED')";
885 database.Query(insert);
887 if (remove_notification == "")
889 remark = "Gnucomo detected that package(s) have disappeared ";
890 remove_notification = database.new_notification(objectid, "parameter removed", remark);
893 if (remove_notification != "")
895 insert = "insert into parameter_notification (notificationid, paramid) values ('";
896 insert += remove_notification + "', '";
897 insert += paramid + "')";
899 database.Query(insert);
903 std::cerr << "gcm_input ERROR: Cannot create 'parameter removed' notification.\n";
912 std::cout << nr_lines << " lines parsed from the log file.\n";
917 /*=========================================================================
919 ** SYNOPSIS : String SQL_Escape(String)
923 ** DESCRIPTION : Insert backslashes before single quotes.
930 **=========================================================================
933 String SQL_Escape(String s)
937 for (i = 0; i < ~s; i++)