3 int LevenshteinDistance(const String &source, const String &target)
7 return LevenshteinDistance(target, source);
10 const int min_size = ~source, max_size = ~target;
11 std::vector<int> lev_dist(min_size + 1);
13 for (int i = 0; i <= min_size; ++i)
18 for (int j = 1; j <= max_size; ++j)
20 int previous_diagonal = lev_dist[0], previous_diagonal_save;
23 for (int i = 1; i <= min_size; ++i)
25 previous_diagonal_save = lev_dist[i];
26 if (source[i - 1] == target[j - 1])
28 lev_dist[i] = previous_diagonal;
32 lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1;
34 previous_diagonal = previous_diagonal_save;
38 return lev_dist[min_size];
41 bool begin_filtered(String s, SuperString &filter)
47 for (i = 0; i < ~filter; i++)
49 if (!found && ~s > ~filter[i])
51 String part_to_check = s(0, ~filter[i]);
53 found = part_to_check == filter[i];
60 std::ostream *Log = &std::cerr;
62 int main(int argc, char *argv[])
64 String hostname("skiathos.andromeda.nl");
66 String config_name("gnucomo");
68 /* Get the configuration file */
70 if (!cfg.read(config_name))
72 std::cerr << "Can not read Gnucomo configuration file for " << config_name << ".\n";
75 gnucomo_database db(&cfg);
76 Object host(db, hostname);
78 std::list<ObjectLog> spam_logs;
79 SuperString spam_headers;
80 SuperString pre_filter;
82 // Logs starting with these strings are irrelevant
84 pre_filter += "MIME-Version:";
85 pre_filter += "Content-Type: text";
87 pre_filter += "X-Original-To:";
88 pre_filter += "Delivered-To:";
89 pre_filter += "Date:";
90 pre_filter += "X-Greylist:";
91 pre_filter += "Importance:";
92 pre_filter += "X-Priority:";
93 pre_filter += "X-AntiAbuse:";
98 last_week = today() - 7;
99 start_date = UTC(last_week, now());
101 std::cout << "Scanning spam from " << start_date << "\n";
103 spam_logs = host.select_logs(start_date, Now(), "gnucomo");
105 std::list<ObjectLog>::iterator spam_i;
106 for (spam_i = spam_logs.begin(); spam_i != spam_logs.end(); spam_i++)
108 String log_string = spam_i->raw() << 34;
110 if (!begin_filtered(log_string, pre_filter))
112 spam_headers += log_string;
118 for (i = 0; i < ~spam_headers; i++)
120 std::cout << "\n====================================================================\n";
121 std::cout << "[" << ~spam_headers[i] << "] " << spam_headers[i] << "\n";
122 std::cout << "====================================================================\n";
123 for (j = i + 1; j < ~spam_headers; j++)
125 int d = LevenshteinDistance(spam_headers[i], spam_headers[j]);
126 if (d * 3 < ~spam_headers[i])
128 std::cout << " " << d << " - " << spam_headers[j] << "\n";
133 std::cout << "FINISH.\n";