--- /dev/null
+#include "object.h"
+
+int LevenshteinDistance(const String &source, const String &target)
+{
+ if (~source > ~target)
+ {
+ return LevenshteinDistance(target, source);
+ }
+
+ const int min_size = ~source, max_size = ~target;
+ std::vector<int> lev_dist(min_size + 1);
+
+ for (int i = 0; i <= min_size; ++i)
+ {
+ lev_dist[i] = i;
+ }
+
+ for (int j = 1; j <= max_size; ++j)
+ {
+ int previous_diagonal = lev_dist[0], previous_diagonal_save;
+ ++lev_dist[0];
+
+ for (int i = 1; i <= min_size; ++i)
+ {
+ previous_diagonal_save = lev_dist[i];
+ if (source[i - 1] == target[j - 1])
+ {
+ lev_dist[i] = previous_diagonal;
+ }
+ else
+ {
+ lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1;
+ }
+ previous_diagonal = previous_diagonal_save;
+ }
+ }
+
+ return lev_dist[min_size];
+}
+
+bool begin_filtered(String s, SuperString &filter)
+{
+ int i;
+ bool found;
+
+ found = false;
+ for (i = 0; i < ~filter; i++)
+ {
+ if (!found && ~s > ~filter[i])
+ {
+ String part_to_check = s(0, ~filter[i]);
+
+ found = part_to_check == filter[i];
+ }
+ }
+
+ return found;
+}
+
+std::ostream *Log = &std::cerr;
+
+int main(int argc, char *argv[])
+{
+ String hostname("skiathos.andromeda.nl");
+ gnucomo_config cfg;
+ String config_name("gnucomo");
+
+ /* Get the configuration file */
+
+ if (!cfg.read(config_name))
+ {
+ std::cerr << "Can not read Gnucomo configuration file for " << config_name << ".\n";
+ exit(1);
+ }
+ gnucomo_database db(&cfg);
+ Object host(db, hostname);
+
+ std::list<ObjectLog> spam_logs;
+ SuperString spam_headers;
+ SuperString pre_filter;
+
+ // Logs starting with these strings are irrelevant
+
+ pre_filter += "MIME-Version:";
+ pre_filter += "Content-Type: text";
+ pre_filter += "To:";
+ pre_filter += "X-Original-To:";
+ pre_filter += "Delivered-To:";
+ pre_filter += "Date:";
+ pre_filter += "X-Greylist:";
+ pre_filter += "Importance:";
+ pre_filter += "X-Priority:";
+ pre_filter += "X-AntiAbuse:";
+
+ UTC start_date;
+ date last_week;
+
+ last_week = today() - 7;
+ start_date = UTC(last_week, now());
+
+ std::cout << "Scanning spam from " << start_date << "\n";
+
+ spam_logs = host.select_logs(start_date, Now(), "gnucomo");
+
+ std::list<ObjectLog>::iterator spam_i;
+ for (spam_i = spam_logs.begin(); spam_i != spam_logs.end(); spam_i++)
+ {
+ String log_string = spam_i->raw() << 34;
+
+ if (!begin_filtered(log_string, pre_filter))
+ {
+ spam_headers += log_string;
+ }
+ }
+
+ int i, j;
+
+ for (i = 0; i < ~spam_headers; i++)
+ {
+ std::cout << "\n====================================================================\n";
+ std::cout << "[" << ~spam_headers[i] << "] " << spam_headers[i] << "\n";
+ std::cout << "====================================================================\n";
+ for (j = i + 1; j < ~spam_headers; j++)
+ {
+ int d = LevenshteinDistance(spam_headers[i], spam_headers[j]);
+ if (d * 3 < ~spam_headers[i])
+ {
+ std::cout << " " << d << " - " << spam_headers[j] << "\n";
+ }
+ }
+ }
+
+ std::cout << "FINISH.\n";
+}
--- /dev/null
+/**************************************************************************
+** (c) Copyright 2007, Andromeda Technology & Automation
+** This is free software; you can redistribute it and/or modify it under the
+** terms of the GNU General Public License, see the file COPYING.
+***************************************************************************
+** MODULE INFORMATION *
+***********************
+** FILE NAME : spamdetect.cpp
+** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring
+** VERSION NUMBER : $Revision: 1.2 $
+**
+** DESCRIPTION :
+**
+** EXPORTED OBJECTS :
+** LOCAL OBJECTS :
+** MODULES USED :
+***************************************************************************
+** ADMINISTRATIVE INFORMATION *
+********************************
+** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
+** CREATION DATE : Nov 14, 2007
+** LAST UPDATE : Nov Nov 14, 2007
+** MODIFICATIONS :
+**************************************************************************/
+
+/*****************************
+ $Log: spamdetect.cpp,v $
+ Revision 1.2 2007-11-21 15:14:26 arjen
+ Removed debug output.
+
+ Revision 1.1 2007/11/14 16:20:05 arjen
+ New program: spamdetect.
+ Expirimental utility to log manually reported spam and have
+ Gnucomo detect the spammer's IP address.
+
+*****************************/
+
+static const char *RCSID = "$Id: spamdetect.cpp,v 1.2 2007-11-21 15:14:26 arjen Exp $";
+
+#include <fstream>
+#include <String.h>
+
+#include <syslog.h>
+#include <getopt.h>
+
+int main(int argc, char *argv[])
+{
+ const char *usage = "Usage: spamdetect\n";
+
+ String line;
+ String header;
+ int state = 0;
+
+ // From here, the original spam starts. Something like
+ // -------- Forwarded Message -------- or -------- Original Message --------
+
+ regex fwd_header("---- .+ Message -----");
+ regex received("^Received:");
+ regex from("^From:");
+ regex subject("^Subject:");
+ regex returnpath("^Return-Path:");
+
+ openlog("gnucomo", 0, LOG_MAIL);
+
+
+ while (std::cin >> line)
+ {
+ //std::cout << "[" << state << "] checking " << line << "\n"; // DEBUG
+ switch (state)
+ {
+ case 0:
+ if (line == fwd_header)
+ {
+ state = 1;
+ }
+ break;
+
+ case 1:
+ // Inside the forwarded header
+ if (line == received || line == from || line == returnpath || line == subject)
+ {
+ header = line;
+ state = 2;
+ }
+ break;
+ case 2:
+ if (line == regex("^[^ ]+: "))
+ {
+ //std::cout << "Header: " << header << "\n"; // DEBUG
+ syslog(LOG_WARNING, "%s", (char *)header);
+
+ header = line;
+ }
+ else if (line == String(""))
+ {
+ state = 3;
+ }
+ else
+ {
+ header += " ";
+ header += line;
+ }
+ }
+ }
+
+ closelog();
+}
+