2 /**************************************************************************
3 ** (c) Copyright 2002, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message.cpp
10 ** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
11 ** VERSION NUMBER : $Revision: 1.3 $
13 ** DESCRIPTION : Implementation of the message handling classes
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Sep 16, 2002
23 ** LAST UPDATE : Nov 04, 2002
25 **************************************************************************/
27 /*****************************
29 Revision 1.3 2002-11-09 08:04:27 arjen
30 Added a reference to the GPL
32 Revision 1.2 2002/11/04 10:13:36 arjen
33 Use proper namespace for iostream classes
35 Revision 1.1 2002/10/05 10:25:49 arjen
36 Creation of gcm_input and a first approach to a web interface
38 *****************************/
40 static const char *RCSID = "$Id: message.cpp,v 1.3 2002-11-09 08:04:27 arjen Exp $";
44 extern bool verbose; /* Defined in the main application */
47 /* Utility functions */
49 String SQL_Escape(String s);
51 /*=========================================================================
53 ** SYNOPSIS : bool operator >> (message_buffer &, String &)
55 ** RETURN VALUE : True if input was available.
57 ** DESCRIPTION : Input operator. Read the next line from the message.
63 ** LAST MODIFIED : Nov 04, 2002
64 **=========================================================================
67 bool operator >> (message_buffer &b, String &s)
69 bool input_ok = false;
71 if (b.next_line == b.buffer.end())
77 b.buffer.push_back(l);
79 // next_line keeps pointing to the end.
94 client_message::client_message(std::istream *in, gnucomo_database db)
101 gpg_encrypted = false;
102 classification = UNKNOWN;
106 static const String syslog_date_re("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
107 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
108 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
110 static const regex re_syslog(syslog_date_re + " [a-z]+ [[:alpha:]]+.*:.+");
111 static const regex re_PGP("-----BEGIN PGP MESSAGE-----");
112 static const regex re_dump("^ *DUMP: Date of this level");
113 static const regex re_accesslog("(GET|POST) .+ HTTP");
114 static const regex re_errorlog("^\\[" + unix_date_re + "\\] \\[(error|notice)\\] .+");
116 static const regex re_syslog_date("[[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}");
117 static const regex re_uxmail_from("^From - " + unix_date_re);
118 static const regex re_mail_From("^From:[[:blank:]]+");
119 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
120 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
121 static const regex re_email_user("[[:alnum:]_.-]+@");
123 /*=========================================================================
125 ** SYNOPSIS : double classify(String host, date arriv_d, hour arriv_t, String serv)
127 ** RETURN VALUE : The certainty with which the message is classified.
135 ** LAST MODIFIED : Nov 04, 2002
136 **=========================================================================
139 double client_message::classify(String host, UTC arriv, String serv)
147 /* First, check if the message has a mail header. */
149 if (input >> line && line == re_uxmail_from)
155 /* Scan ahead for the hostname and date of arrival. */
157 while (input >> line && line != "")
159 if (line == re_mail_From)
161 from_address = line(re_email_address);
162 from_address(re_email_user) = ""; // Remove the user part;
163 hostname = from_address;
165 if (line == re_mail_Date)
167 arrival = UTC(line(regex(mail_date_re)));
173 // Push the first line back, we need to read it again.
178 * Now that we have the mail header out of the way, try to figure
179 * out what the content of the message is.
183 while (input >> line && certainty < 0.9)
185 std::cout << " testing: " << line << "\n";
186 if (line == re_syslog)
189 classification = SYSLOG;
192 std::cout << "Syslog detected.\n";
195 else if (line == re_PGP)
198 gpg_encrypted = true;
199 std::cerr << "The message is PGP/GnuPG encrypted.\n";
201 else if (line == re_dump)
206 std::cout << "DUMP output detected.\n";
209 else if (line == re_accesslog)
212 classification = ACCESSLOG;
216 std::cout << "HTTP access log detected.\n";
219 else if (line == re_errorlog)
222 classification = ERRORLOG;
226 std::cout << "HTTP error log detected.\n";
234 std::cerr << "Can not determine the hostname where the message came from.\n";
237 else if (!arrival.proper())
239 std::cerr << "Arrival time is not knwon.\n";
250 /*=========================================================================
252 ** SYNOPSIS : int enter()
254 ** RETURN VALUE : The number of lines successfully parsed from the input
262 ** LAST MODIFIED : Nov 04, 2002
263 **=========================================================================
266 int client_message::enter()
271 /* Double-check the classification of the message */
273 if (classification == UNKNOWN || certainty < 0.9 || gpg_encrypted)
280 // Skip the mail header.
282 while (input >> line && line != "");
285 /* Try to find the host in the database */
289 objectid = database.find_host(hostname);
292 std::cerr << "Please define the host " << hostname << " in the database.\n";
297 std::cout << "Object id for " << hostname << " is " << objectid << "\n";
300 /* Scan the input line by line, entring records into the database */
302 String rest; // Rest of the line to be parsed
304 while (input >> line)
308 std::cout << line << "\n";
312 /* Check each line if it contains valid information */
316 switch (classification)
322 check = &re_accesslog;
325 check = &re_errorlog;
335 String insertion("insert into log (objectid, servicecode,"
336 " object_timestamp, timestamp, rawdata) values (");
339 switch (classification)
344 if (log_date.Year() < 0 || log_date.Year() > 2500)
346 // The year is not in the log file. Assume the year of arrival
348 log_date = date(log_date.Day(), log_date.Month(), date(arrival).Year());
353 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
357 if (rest(0,i) == hostname(0,i))
362 std::cout << " Hostname matches.\n";
363 std::cout << " rest = " << rest << "\n";
365 for (i = 0; isalpha(rest[i]) && i < ~rest; i++);
368 std::cout << " Service name = " << rest(0,i) << "\n";
371 /* Insert a new record into the log table */
373 insertion += "'" + objectid + "',";
374 insertion += "'" + rest(0,i) + "',";
375 insertion += "'" + log_date.format() + " " + log_time.format() + "',";
376 insertion += "'" + arrival.format() + "',";
377 insertion += "'" + SQL_Escape(line) + "'";
382 std::cout << insertion << "\n";
386 database.Query(insertion);
398 std::cerr << " Hostname " << rest(0,i) << " does not match.\n";
403 datestring = line(regex("\\[.+\\]"));
406 datestring[datestring.index(':')] = ' ';
407 log_date = datestring;
408 log_time = datestring;
411 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
413 insertion += "'" + objectid + "',";
414 insertion += "'" + service + "',";
415 insertion += "'" + log_date.format() + " " + log_time.format() + "',";
416 insertion += "'" + arrival.format() + "',";
417 insertion += "'" + SQL_Escape(line) + "'";
422 std::cout << insertion << "\n";
426 database.Query(insertion);
438 datestring = line(regex("\\[.+\\]"));
441 log_date = datestring;
442 log_time = datestring;
445 std::cout << " Log timestamp = " << log_date << " " << log_time << "\n";
447 insertion += "'" + objectid + "',";
448 insertion += "'" + service + "',";
449 insertion += "'" + log_date.format() + " " + log_time.format() + "',";
450 insertion += "'" + arrival.format() + "',";
451 insertion += "'" + SQL_Escape(line) + "'";
456 std::cout << insertion << "\n";
460 database.Query(insertion);
474 std::cerr << "gcm_input WARNING: Not a valid line: " << line << "\n";
480 std::cout << nr_lines << " lines parsed from the log file.\n";
485 /*=========================================================================
487 ** SYNOPSIS : String SQL_Escape(String)
491 ** DESCRIPTION : Insert backslashes before single quotes.
498 **=========================================================================
501 String SQL_Escape(String s)
505 for (i = 0; i < ~s; i++)