#!/usr/bin/perl use strict; use vars qw { @archives $user }; @archives = ( "easy_ham", "hard_ham", "spam", "spam_2" ); print "SpamAssassin Public Corpus Trainer v0.1.0\n\n"; $user = shift; if ($user eq "") { print "Syntax: $0 [username]\n"; exit(-1); } foreach(@archives) { print "Searching for corpus $_ ...\n"; if (-d $_) { print "...found it!\n"; print "Training with corpus $_ ..."; &Train($user, $_); print "...done!\n"; } else { print "...not found.\n"; } } print "Training complete.\n"; print "Now run \"dspam_clean -p0 $user\" to purge uninteresting data\n"; sub Train { my($user, $corpus) = @_; my(@files, $file, $cmd, $class); opendir(DIR, "$corpus"); @files = grep(!/^\.\.?$/, readdir(DIR)); closedir(DIR); if ($corpus =~ /ham/) { $class = "innocent"; } elsif ($corpus =~ /spam/) { $class = "spam"; } else { print "Unable to determine whether $corpus is ham or spam. Skipping.\n"; return; } foreach $file (@files) { my($ret); next if ($file eq "cmds"); $cmd = "dspam --user $user --class=$class --source=corpus < $corpus/$file"; $ret = system($cmd); print "Command returned error $ret: $cmd\n" if ($ret); } return; }