Spam scanning investigation master
authorArjen Baart <arjen@andromeda.nl>
Sat, 13 Mar 2021 12:27:48 +0000 (13:27 +0100)
committerArjen Baart <arjen@andromeda.nl>
Sat, 13 Mar 2021 12:27:48 +0000 (13:27 +0100)
src/spam/Makefile.am [new file with mode: 0644]
src/spam/scanspam.cpp [new file with mode: 0644]
src/spam/spamdetect.cpp [new file with mode: 0644]

diff --git a/src/spam/Makefile.am b/src/spam/Makefile.am
new file mode 100644 (file)
index 0000000..c14cd48
--- /dev/null
@@ -0,0 +1,11 @@
+
+bin_PROGRAMS = spamdetect scanspam
+
+AM_CPPFLAGS = -I../include
+LDADD = ../lib/libgnucomo.a -lACL
+
+scanspam_CPPFLAGS = -I../include @LIBPQXX_CFLAGS@ 
+scanspam_LDADD = ../lib/libgnucomo.a @LIBPQXX_LIBS@ 
+
+spamdetect_SOURCES = spamdetect.cpp
+scanspam_SOURCES = scanspam.cpp
diff --git a/src/spam/scanspam.cpp b/src/spam/scanspam.cpp
new file mode 100644 (file)
index 0000000..c551091
--- /dev/null
@@ -0,0 +1,134 @@
+#include "object.h"
+
+int LevenshteinDistance(const String &source, const String &target)
+{
+    if (~source > ~target)
+    {
+        return LevenshteinDistance(target, source);
+    }
+
+    const int min_size = ~source, max_size = ~target;
+    std::vector<int> lev_dist(min_size + 1);
+
+    for (int i = 0; i <= min_size; ++i)
+    {
+        lev_dist[i] = i;
+    }
+
+    for (int j = 1; j <= max_size; ++j)
+    {
+        int previous_diagonal = lev_dist[0], previous_diagonal_save;
+        ++lev_dist[0];
+
+        for (int i = 1; i <= min_size; ++i)
+        {
+            previous_diagonal_save = lev_dist[i];
+            if (source[i - 1] == target[j - 1])
+            {
+                lev_dist[i] = previous_diagonal;
+            }
+            else
+            {
+                lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1;
+            }
+            previous_diagonal = previous_diagonal_save;
+        }
+    }
+
+    return lev_dist[min_size];
+}
+
+bool begin_filtered(String s, SuperString &filter)
+{
+   int i;
+   bool found;
+
+   found = false;
+   for (i = 0; i < ~filter; i++)
+   {
+      if (!found && ~s > ~filter[i])
+      {
+         String part_to_check = s(0, ~filter[i]);
+
+         found = part_to_check == filter[i];
+      }
+   }
+
+   return found;
+}
+
+std::ostream *Log = &std::cerr;
+
+int main(int argc, char *argv[])
+{
+   String hostname("skiathos.andromeda.nl");
+   gnucomo_config    cfg;
+   String           config_name("gnucomo");
+
+   /*  Get the configuration file */
+
+   if (!cfg.read(config_name))
+   {
+      std::cerr << "Can not read Gnucomo configuration file for " << config_name << ".\n";
+      exit(1);
+   }
+   gnucomo_database db(&cfg);
+   Object host(db, hostname);
+
+   std::list<ObjectLog> spam_logs;
+   SuperString          spam_headers;
+   SuperString          pre_filter;
+
+   // Logs starting with these strings are irrelevant
+
+   pre_filter += "MIME-Version:";
+   pre_filter += "Content-Type: text";
+   pre_filter += "To:";
+   pre_filter += "X-Original-To:";
+   pre_filter += "Delivered-To:";
+   pre_filter += "Date:";
+   pre_filter += "X-Greylist:";
+   pre_filter += "Importance:";
+   pre_filter += "X-Priority:";
+   pre_filter += "X-AntiAbuse:";
+
+   UTC start_date;
+   date last_week;
+
+   last_week = today() - 7;
+   start_date = UTC(last_week, now());
+
+   std::cout << "Scanning spam from " << start_date << "\n";
+
+   spam_logs = host.select_logs(start_date, Now(), "gnucomo");
+
+   std::list<ObjectLog>::iterator spam_i;
+   for (spam_i = spam_logs.begin(); spam_i != spam_logs.end(); spam_i++)
+   {
+      String log_string = spam_i->raw() << 34;
+
+      if (!begin_filtered(log_string, pre_filter))
+      {
+         spam_headers += log_string;
+      }
+   }
+
+   int i, j;
+
+   for (i = 0; i < ~spam_headers; i++)
+   {
+      std::cout << "\n====================================================================\n";
+      std::cout << "[" << ~spam_headers[i] << "] " << spam_headers[i] << "\n";
+      std::cout << "====================================================================\n";
+      for (j = i + 1; j < ~spam_headers; j++)
+      {
+         int d = LevenshteinDistance(spam_headers[i], spam_headers[j]);
+         if (d * 3 < ~spam_headers[i])
+         {
+            std::cout << "    " << d << " - " << spam_headers[j] << "\n";
+         }
+      }
+   }
+
+   std::cout << "FINISH.\n";
+}
diff --git a/src/spam/spamdetect.cpp b/src/spam/spamdetect.cpp
new file mode 100644 (file)
index 0000000..38487fe
--- /dev/null
@@ -0,0 +1,108 @@
+/**************************************************************************
+**  (c) Copyright 2007, Andromeda Technology & Automation
+** This is free software; you can redistribute it and/or modify it under the
+** terms of the GNU General Public License, see the file COPYING.
+***************************************************************************
+** MODULE INFORMATION *
+***********************
+**      FILE NAME      : spamdetect.cpp
+**      SYSTEM NAME    : Gnucomo - Gnu Computer Monitoring
+**      VERSION NUMBER : $Revision: 1.2 $
+**
+**  DESCRIPTION      :  
+**
+**  EXPORTED OBJECTS : 
+**  LOCAL    OBJECTS : 
+**  MODULES  USED    :
+***************************************************************************
+**  ADMINISTRATIVE INFORMATION *
+********************************
+**      ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl
+**      CREATION DATE   : Nov 14, 2007
+**      LAST UPDATE     : Nov Nov 14, 2007
+**      MODIFICATIONS   : 
+**************************************************************************/
+
+/*****************************
+   $Log: spamdetect.cpp,v $
+   Revision 1.2  2007-11-21 15:14:26  arjen
+   Removed debug output.
+
+   Revision 1.1  2007/11/14 16:20:05  arjen
+   New program: spamdetect.
+   Expirimental utility to log manually reported spam and have
+   Gnucomo detect the spammer's IP address.
+
+*****************************/
+
+static const char *RCSID = "$Id: spamdetect.cpp,v 1.2 2007-11-21 15:14:26 arjen Exp $";
+
+#include <fstream>
+#include <String.h>
+
+#include <syslog.h>
+#include <getopt.h>
+
+int main(int argc, char *argv[])
+{
+   const char *usage = "Usage: spamdetect\n";
+
+   String line;
+   String header;
+   int    state = 0;
+
+   //  From here, the original spam starts. Something like 
+   //  -------- Forwarded Message --------  or  -------- Original Message -------- 
+
+   regex fwd_header("---- .+ Message -----");
+   regex received("^Received:");
+   regex from("^From:");
+   regex subject("^Subject:");
+   regex returnpath("^Return-Path:");
+
+   openlog("gnucomo", 0, LOG_MAIL);
+
+
+   while (std::cin >> line)
+   {
+      //std::cout <<   "[" << state << "]   checking " << line << "\n";  // DEBUG
+      switch (state)
+      {
+      case 0:
+         if (line == fwd_header)
+         {
+            state = 1;
+         }
+         break;
+
+      case 1:
+         //  Inside the forwarded header
+         if (line == received || line == from || line == returnpath || line == subject)
+         {
+            header = line;
+            state = 2;
+         }
+         break;
+      case 2:
+         if (line == regex("^[^ ]+: "))
+         {
+            //std::cout << "Header: " << header << "\n";   // DEBUG
+            syslog(LOG_WARNING, "%s", (char *)header);
+
+            header = line;
+         }
+         else if (line == String(""))
+         {
+            state = 3;
+         }
+         else
+         {
+            header += " ";
+            header += line;
+         }
+      }
+   }
+
+   closelog();
+}
+