+
+/**************************************************************************
+** (c) Copyright 2002, Andromeda Technology & Automation
+***************************************************************************
+** MODULE INFORMATION *
+***********************
+** FILE NAME : message.cpp
+** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
+** VERSION NUMBER : $Revision: 1.1 $
+**
+** DESCRIPTION : Implementation of the message handling classes
+**
+** EXPORTED OBJECTS :
+** LOCAL OBJECTS :
+** MODULES USED :
+***************************************************************************
+** ADMINISTRATIVE INFORMATION *
+********************************
+** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
+** CREATION DATE : Sep 16, 2002
+** LAST UPDATE : Oct 05, 2002
+** MODIFICATIONS :
+**************************************************************************/
+
+/*****************************
+ $Log: message.cpp,v $
+ Revision 1.1 2002-10-05 10:25:49 arjen
+ Creation of gcm_input and a first approach to a web interface
+
+*****************************/
+
+static const char *RCSID = "$Id: message.cpp,v 1.1 2002-10-05 10:25:49 arjen Exp $";
+
+#include "message.h"
+
+extern bool verbose; /* Defined in the main application */
+extern bool testmode;
+
+/* Utility functions */
+
+String SQL_Escape(String s);
+
+/*=========================================================================
+** NAME : operator >>
+** SYNOPSIS : bool operator >> (message_buffer &, String &)
+** PARAMETERS :
+** RETURN VALUE : True if input was available.
+**
+** DESCRIPTION : Input operator. Read the next line from the message.
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED : Sep 30, 2002
+**=========================================================================
+*/
+
+bool operator >> (message_buffer &b, String &s)
+{
+ bool input_ok = false;
+
+ if (b.next_line == b.buffer.end())
+ {
+ String l;
+
+ //cout << " buffer is depleted.\n";
+ if (*(b.input) >> l)
+ {
+ b.buffer.push_back(l);
+
+ // next_line keeps pointing to the end.
+
+ s = l;
+ input_ok = true;
+ //cout << " new line from input.\n";
+ }
+ }
+ else
+ {
+ //cout << " reading from cache.\n";
+ s = *(b.next_line);
+ b.next_line++;
+ input_ok = true;
+ }
+ return input_ok;
+}
+
+client_message::client_message(istream *in, gnucomo_database db)
+{
+ input.from(in);
+ database = db;
+
+ hostname = "";
+ mail_header = false;
+ gpg_encrypted = false;
+ classification = UNKNOWN;
+ certainty = 0.0;
+}
+
+static const String syslog_date_re("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
+static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
+static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
+
+static const regex re_syslog(syslog_date_re + " [a-z]+ [[:alpha:]]+.*:.+");
+static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
+static const regex re_dump("^ *DUMP: Date of this level");
+static const regex re_accesslog("(GET|POST) .+ HTTP");
+static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
+
+static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
+static const regex re_uxmail_from("^From - " + unix_date_re);
+static const regex re_mail_From("^From:[[:blank:]]+");
+static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
+static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
+static const regex re_email_user("[[:alnum:]_.-]+@");
+
+/*=========================================================================
+** NAME : classify
+** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
+** PARAMETERS :
+** RETURN VALUE : The certainty with which the message is classified.
+**
+** DESCRIPTION :
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED : Oct 05, 2002
+**=========================================================================
+*/
+
+double client_message::classify(String host, UTC arriv, String serv)
+{
+ String line;
+
+ hostname = host;
+ arrival = arriv;
+ service = serv;
+
+ /* First, check if the message has a mail header. */
+
+ if (input >> line && line == re_uxmail_from)
+ {
+ String from_address;
+
+ mail_header = true;
+
+ /* Scan ahead for the hostname and date of arrival. */
+
+ while (input >> line && line != "")
+ {
+ if (line == re_mail_From)
+ {
+ from_address = line(re_email_address);
+ from_address(re_email_user) = ""; // Remove the user part;
+ hostname = from_address;
+ }
+ if (line == re_mail_Date)
+ {
+ arrival = UTC(line(regex(mail_date_re)));
+ }
+ }
+ }
+ else
+ {
+ // Push the first line back, we need to read it again.
+ --input;
+ }
+
+ /*
+ * Now that we have the mail header out of the way, try to figure
+ * out what the content of the message is.
+ */
+
+
+ while (input >> line && certainty < 0.9)
+ {
+ cout << " testing: " << line << "\n";
+ if (line == re_syslog)
+ {
+ certainty = 1.0;
+ classification = SYSLOG;
+ if (verbose)
+ {
+ cout << "Syslog detected.\n";
+ }
+ }
+ else if (line == re_PGP)
+ {
+ certainty = 1.0;
+ gpg_encrypted = true;
+ cerr << "The message is PGP/GnuPG encrypted.\n";
+ }
+ else if (line == re_dump)
+ {
+ certainty = 1.0;
+ if (verbose)
+ {
+ cout << "DUMP output detected.\n";
+ }
+ }
+ else if (line == re_accesslog)
+ {
+ certainty = 1.0;
+ classification = ACCESSLOG;
+ service = "httpd";
+ if (verbose)
+ {
+ cout << "HTTP access log detected.\n";
+ }
+ }
+ else if (line == re_errorlog)
+ {
+ certainty = 1.0;
+ classification = ERRORLOG;
+ service = "httpd";
+ if (verbose)
+ {
+ cout << "HTTP error log detected.\n";
+ }
+ }
+ }
+ input.rewind();
+
+ if (hostname == "")
+ {
+ cerr << "Can not determine the hostname where the message came from.\n";
+ certainty = 0.0;
+ }
+ else if (!arrival.proper())
+ {
+ cerr << "Arrival time is not knwon.\n";
+ certainty = 0.0;
+ }
+ else
+ {
+ certainty = 1.0;
+ }
+
+ return certainty;
+}
+
+/*=========================================================================
+** NAME : enter
+** SYNOPSIS : int enter()
+** PARAMETERS :
+** RETURN VALUE : The number of lines successfully parsed from the input
+**
+** DESCRIPTION :
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED : Oct 05, 2002
+**=========================================================================
+*/
+
+int client_message::enter()
+{
+ long nr_lines = 0;
+ String line;
+
+ /* Double-check the classification of the message */
+
+ if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
+ {
+ return 0;
+ }
+
+ if (mail_header)
+ {
+ // Skip the mail header.
+
+ while (input >> line && line != "");
+ }
+
+ /* Try to find the host in the database */
+
+ String objectid;
+
+ objectid = database.find_host(hostname);
+ if (objectid == "")
+ {
+ cerr << "Please define the host " << hostname << " in the database.\n";
+ return 0;
+ }
+ if (verbose)
+ {
+ cout << "Object id for " << hostname << " is " << objectid << "\n";
+ }
+
+ /* Scan the input line by line, entring records into the database */
+
+ String rest; // Rest of the line to be parsed
+
+ while (input >> line)
+ {
+ if (verbose)
+ {
+ cout << line << "\n";
+ }
+
+
+ /* Check each line if it contains valid information */
+
+ const regex *check;
+
+ switch (classification)
+ {
+ case SYSLOG:
+ check = &re_syslog;
+ break;
+ case ACCESSLOG:
+ check = &re_accesslog;
+ break;
+ case ERRORLOG:
+ check = &re_errorlog;
+ break;
+ }
+
+ if (line == *check)
+ {
+ date log_date;
+ hour log_time;
+ int i;
+
+ String insertion("insert into log (objectid, servicecode,"
+ " object_timestamp, timestamp, rawdata) values (");
+ String datestring;
+
+ switch (classification)
+ {
+ case SYSLOG:
+ log_date = line;
+ log_time = line;
+ if (log_date.Year() < 0 || log_date.Year() > 2500)
+ {
+ // The year is not in the log file. Assume the year of arrival
+
+ log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
+ }
+
+ if (verbose)
+ {
+ cout << " Log timestamp = " << log_date << " " << log_time << "\n";
+ }
+ rest = line << 16;
+ i = rest.index(' ');
+ if (rest(0,i) == hostname(0,i))
+ {
+ rest <<= i + 1;
+ if (verbose)
+ {
+ cout << " Hostname matches.\n";
+ cout << " rest = " << rest << "\n";
+ }
+ for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
+ if (verbose)
+ {
+ cout << " Service name = " << rest(0,i) << "\n";
+ }
+
+ /* Insert a new record into the log table */
+
+ insertion += "'" + objectid + "',";
+ insertion += "'" + rest(0,i) + "',";
+ insertion += "'" + log_date.format() + " " + log_time.format() + "',";
+ insertion += "'" + arrival.format() + "',";
+ insertion += "'" + SQL_Escape(line) + "'";
+ insertion += ")";
+
+ if (testmode)
+ {
+ cout << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ cout << "\n\n";
+ }
+
+ nr_lines++;
+ }
+ else
+ {
+ cerr << " Hostname " << rest(0,i) << " does not match.\n";
+ }
+ break;
+
+ case ACCESSLOG:
+ datestring = line(regex("\\[.+\\]"));
+ datestring <<= 1;
+ datestring >>= 1;
+ datestring[datestring.index(':')] = ' ';
+ log_date = datestring;
+ log_time = datestring;
+ if (verbose)
+ {
+ cout << " Log timestamp = " << log_date << " " << log_time << "\n";
+ }
+ insertion += "'" + objectid + "',";
+ insertion += "'" + service + "',";
+ insertion += "'" + log_date.format() + " " + log_time.format() + "',";
+ insertion += "'" + arrival.format() + "',";
+ insertion += "'" + SQL_Escape(line) + "'";
+ insertion += ")";
+
+ if (testmode)
+ {
+ cout << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ cout << "\n\n";
+ }
+
+ nr_lines++;
+ break;
+
+ case ERRORLOG:
+ datestring = line(regex("\\[.+\\]"));
+ datestring <<= 1;
+ datestring >>= 1;
+ log_date = datestring;
+ log_time = datestring;
+ if (verbose)
+ {
+ cout << " Log timestamp = " << log_date << " " << log_time << "\n";
+ }
+ insertion += "'" + objectid + "',";
+ insertion += "'" + service + "',";
+ insertion += "'" + log_date.format() + " " + log_time.format() + "',";
+ insertion += "'" + arrival.format() + "',";
+ insertion += "'" + SQL_Escape(line) + "'";
+ insertion += ")";
+
+ if (testmode)
+ {
+ cout << insertion << "\n";
+ }
+ else
+ {
+ database.Query(insertion);
+ }
+
+ if (verbose)
+ {
+ cout << "\n\n";
+ }
+
+ nr_lines++;
+ break;
+ }
+ }
+ else
+ {
+ cerr << "gcm_input WARNING: Not a valid line: " << line << "\n";
+ }
+ }
+
+ if (verbose)
+ {
+ cout << nr_lines << " lines parsed from the log file.\n";
+ }
+ return nr_lines;
+}
+
+/*=========================================================================
+** NAME : SQL_Escape
+** SYNOPSIS : String SQL_Escape(String)
+** PARAMETERS :
+** RETURN VALUE :
+**
+** DESCRIPTION : Insert backslashes before single quotes.
+**
+** VARS USED :
+** VARS CHANGED :
+** FUNCTIONS USED :
+** SEE ALSO :
+** LAST MODIFIED :
+**=========================================================================
+*/
+
+String SQL_Escape(String s)
+{
+ int i;
+
+ for (i = 0; i < ~s; i++)
+ {
+ if (s[i] == '\'')
+ {
+ s(i,0) = "\\";
+ i++;
+ }
+ }
+
+ return s;
+}