From cf41d5ed3352e12fd592aa84820d0b3ae85be00f Mon Sep 17 00:00:00 2001 From: Arjen Baart Date: Sat, 13 Mar 2021 13:27:48 +0100 Subject: [PATCH] Spam scanning investigation --- src/spam/Makefile.am | 11 ++++ src/spam/scanspam.cpp | 134 ++++++++++++++++++++++++++++++++++++++++++++++++ src/spam/spamdetect.cpp | 108 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 253 insertions(+) create mode 100644 src/spam/Makefile.am create mode 100644 src/spam/scanspam.cpp create mode 100644 src/spam/spamdetect.cpp diff --git a/src/spam/Makefile.am b/src/spam/Makefile.am new file mode 100644 index 0000000..c14cd48 --- /dev/null +++ b/src/spam/Makefile.am @@ -0,0 +1,11 @@ + +bin_PROGRAMS = spamdetect scanspam + +AM_CPPFLAGS = -I../include +LDADD = ../lib/libgnucomo.a -lACL + +scanspam_CPPFLAGS = -I../include @LIBPQXX_CFLAGS@ +scanspam_LDADD = ../lib/libgnucomo.a @LIBPQXX_LIBS@ + +spamdetect_SOURCES = spamdetect.cpp +scanspam_SOURCES = scanspam.cpp diff --git a/src/spam/scanspam.cpp b/src/spam/scanspam.cpp new file mode 100644 index 0000000..c551091 --- /dev/null +++ b/src/spam/scanspam.cpp @@ -0,0 +1,134 @@ +#include "object.h" + +int LevenshteinDistance(const String &source, const String &target) +{ + if (~source > ~target) + { + return LevenshteinDistance(target, source); + } + + const int min_size = ~source, max_size = ~target; + std::vector lev_dist(min_size + 1); + + for (int i = 0; i <= min_size; ++i) + { + lev_dist[i] = i; + } + + for (int j = 1; j <= max_size; ++j) + { + int previous_diagonal = lev_dist[0], previous_diagonal_save; + ++lev_dist[0]; + + for (int i = 1; i <= min_size; ++i) + { + previous_diagonal_save = lev_dist[i]; + if (source[i - 1] == target[j - 1]) + { + lev_dist[i] = previous_diagonal; + } + else + { + lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1; + } + previous_diagonal = previous_diagonal_save; + } + } + + return lev_dist[min_size]; +} + +bool begin_filtered(String s, SuperString &filter) +{ + int i; + bool found; + + found = false; + for (i = 0; i < ~filter; i++) + { + if (!found && ~s > ~filter[i]) + { + String part_to_check = s(0, ~filter[i]); + + found = part_to_check == filter[i]; + } + } + + return found; +} + +std::ostream *Log = &std::cerr; + +int main(int argc, char *argv[]) +{ + String hostname("skiathos.andromeda.nl"); + gnucomo_config cfg; + String config_name("gnucomo"); + + /* Get the configuration file */ + + if (!cfg.read(config_name)) + { + std::cerr << "Can not read Gnucomo configuration file for " << config_name << ".\n"; + exit(1); + } + gnucomo_database db(&cfg); + Object host(db, hostname); + + std::list spam_logs; + SuperString spam_headers; + SuperString pre_filter; + + // Logs starting with these strings are irrelevant + + pre_filter += "MIME-Version:"; + pre_filter += "Content-Type: text"; + pre_filter += "To:"; + pre_filter += "X-Original-To:"; + pre_filter += "Delivered-To:"; + pre_filter += "Date:"; + pre_filter += "X-Greylist:"; + pre_filter += "Importance:"; + pre_filter += "X-Priority:"; + pre_filter += "X-AntiAbuse:"; + + UTC start_date; + date last_week; + + last_week = today() - 7; + start_date = UTC(last_week, now()); + + std::cout << "Scanning spam from " << start_date << "\n"; + + spam_logs = host.select_logs(start_date, Now(), "gnucomo"); + + std::list::iterator spam_i; + for (spam_i = spam_logs.begin(); spam_i != spam_logs.end(); spam_i++) + { + String log_string = spam_i->raw() << 34; + + if (!begin_filtered(log_string, pre_filter)) + { + spam_headers += log_string; + } + } + + int i, j; + + for (i = 0; i < ~spam_headers; i++) + { + std::cout << "\n====================================================================\n"; + std::cout << "[" << ~spam_headers[i] << "] " << spam_headers[i] << "\n"; + std::cout << "====================================================================\n"; + for (j = i + 1; j < ~spam_headers; j++) + { + int d = LevenshteinDistance(spam_headers[i], spam_headers[j]); + if (d * 3 < ~spam_headers[i]) + { + std::cout << " " << d << " - " << spam_headers[j] << "\n"; + } + } + } + + std::cout << "FINISH.\n"; +} diff --git a/src/spam/spamdetect.cpp b/src/spam/spamdetect.cpp new file mode 100644 index 0000000..38487fe --- /dev/null +++ b/src/spam/spamdetect.cpp @@ -0,0 +1,108 @@ +/************************************************************************** +** (c) Copyright 2007, Andromeda Technology & Automation +** This is free software; you can redistribute it and/or modify it under the +** terms of the GNU General Public License, see the file COPYING. +*************************************************************************** +** MODULE INFORMATION * +*********************** +** FILE NAME : spamdetect.cpp +** SYSTEM NAME : Gnucomo - Gnu Computer Monitoring +** VERSION NUMBER : $Revision: 1.2 $ +** +** DESCRIPTION : +** +** EXPORTED OBJECTS : +** LOCAL OBJECTS : +** MODULES USED : +*************************************************************************** +** ADMINISTRATIVE INFORMATION * +******************************** +** ORIGINAL AUTHOR : Arjen Baart - arjen@andromeda.nl +** CREATION DATE : Nov 14, 2007 +** LAST UPDATE : Nov Nov 14, 2007 +** MODIFICATIONS : +**************************************************************************/ + +/***************************** + $Log: spamdetect.cpp,v $ + Revision 1.2 2007-11-21 15:14:26 arjen + Removed debug output. + + Revision 1.1 2007/11/14 16:20:05 arjen + New program: spamdetect. + Expirimental utility to log manually reported spam and have + Gnucomo detect the spammer's IP address. + +*****************************/ + +static const char *RCSID = "$Id: spamdetect.cpp,v 1.2 2007-11-21 15:14:26 arjen Exp $"; + +#include +#include + +#include +#include + +int main(int argc, char *argv[]) +{ + const char *usage = "Usage: spamdetect\n"; + + String line; + String header; + int state = 0; + + // From here, the original spam starts. Something like + // -------- Forwarded Message -------- or -------- Original Message -------- + + regex fwd_header("---- .+ Message -----"); + regex received("^Received:"); + regex from("^From:"); + regex subject("^Subject:"); + regex returnpath("^Return-Path:"); + + openlog("gnucomo", 0, LOG_MAIL); + + + while (std::cin >> line) + { + //std::cout << "[" << state << "] checking " << line << "\n"; // DEBUG + switch (state) + { + case 0: + if (line == fwd_header) + { + state = 1; + } + break; + + case 1: + // Inside the forwarded header + if (line == received || line == from || line == returnpath || line == subject) + { + header = line; + state = 2; + } + break; + case 2: + if (line == regex("^[^ ]+: ")) + { + //std::cout << "Header: " << header << "\n"; // DEBUG + syslog(LOG_WARNING, "%s", (char *)header); + + header = line; + } + else if (line == String("")) + { + state = 3; + } + else + { + header += " "; + header += line; + } + } + } + + closelog(); +} + -- 2.11.0