2 /**************************************************************************
3 ** (c) Copyright 2003, Andromeda Technology & Automation
4 ** This is free software; you can redistribute it and/or modify it under the
5 ** terms of the GNU General Public License, see the file COPYING.
6 ***************************************************************************
7 ** MODULE INFORMATION *
8 ***********************
9 ** FILE NAME : message_filter.cpp
11 ** VERSION NUMBER : $Revision: 1.1 $
18 ***************************************************************************
19 ** ADMINISTRATIVE INFORMATION *
20 ********************************
21 ** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
22 ** CREATION DATE : Nov 26, 2003
23 ** LAST UPDATE : Nov 26, 2003
25 **************************************************************************/
27 /*****************************
28 $Log: message_filter.cpp,v $
29 Revision 1.1 2003-12-04 10:38:09 arjen
30 Major redesign. All input is handled through XML. Raw input data is first
31 transformed into an XML document for further processing.
32 A collection of polymorphic classes handle the transformation of various
33 input formats into XML.
34 Classifying input data is done with a finite improbability calculation.
36 *****************************/
38 /* static const char *RCSID = "$Id: message_filter.cpp,v 1.1 2003-12-04 10:38:09 arjen Exp $"; */
40 #include "message_filter.h"
42 extern std::ostream *Log;
44 /*=========================================================================
45 ** NAME : constructXML
46 ** SYNOPSIS : int constructXML(message_buffer &in, std::strstream &xml)
50 ** DESCRIPTION : Copy the input stream into the internal XML buffer
51 ** The input is already in XML format, so no real transformation
58 ** LAST MODIFIED : Nov 26, 2003
59 **=========================================================================
62 void message_filter::construct_XML(message_buffer &in, std::strstream &xml)
72 static const String mail_date_re("[[:alpha:]]{3}, [ 123]?[0-9] [[:alpha:]]{3} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [+-][0-9]{4}");
73 static const String unix_date_re("[[:alpha:]]{3} [[:alpha:]]{3} [ 123][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}");
74 static const regex re_uxmail_from("^From [^ \t]+[ ]+" + unix_date_re);
75 static const regex re_mail_From("^From:[[:blank:]]+");
76 static const regex re_mail_Date("^Date:[[:blank:]]+" + mail_date_re);
77 static const regex re_mail_MsId("^Message-Id:[[:blank:]]+");
78 static const regex re_email_address("[[:alnum:]_.-]+@[[:alnum:]_.-]+");
79 static const regex re_email_user("[[:alnum:]_.-]+@");
81 /*=========================================================================
82 ** NAME : scan_email_header
83 ** SYNOPSIS : void scan_email_header(message_buffer &in)
93 ** LAST MODIFIED : Nov 26, 2003
94 **=========================================================================
97 void message_filter::scan_email_header(message_buffer &in)
101 /* First, check if the message has a mail header. */
103 if (in >> line && line == re_uxmail_from)
107 /* Scan ahead for the hostname and date of arrival. */
109 while (in >> line && line != "")
112 if (line == re_mail_From)
114 from_address = line(re_email_address);
115 from_address(re_email_user) = ""; // Remove the user part;
116 if (from_address != "" && ~hn < ~from_address)
118 *Log << "Detected hostname " << from_address << "\n";
122 if (line == re_mail_MsId)
124 from_address = line(re_email_address);
125 from_address(re_email_user) = ""; // Remove the user part;
126 if (from_address != "" && ~hn < ~from_address)
128 *Log << "Detected hostname " << from_address << "\n";
132 if (line == re_mail_Date)
134 ts = UTC(line(regex(mail_date_re)));
140 // Push the first line back, we need to read it again.
147 /*=========================================================================
148 ** NAME : construct_header
149 ** SYNOPSIS : void construct_header(std::strstream &xml)
153 ** DESCRIPTION : Create the header for a Gnucomo XML document.
159 ** LAST MODIFIED : Nov 26, 2003
160 **=========================================================================
163 void message_filter::construct_header(std::strstream &xml)
165 //xml << "<?xml version='1.0' encoding='utf-8'?>\n";
166 xml << "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
167 xml << "<gcmt:message xmlns:gcmt='http://gnucomo.org/transport/'>\n";
168 xml << " <gcmt:header>\n";
169 xml << " <gcmt:messagetype>" << mt << "</gcmt:messagetype>\n";
170 xml << " <gcmt:hostname>" << hn << "</gcmt:hostname>\n";
171 xml << " <gcmt:service>" << srv << "</gcmt:service>\n";
172 xml << " <gcmt:time>" << ts << "</gcmt:time>\n";
173 xml << " </gcmt:header>\n";