Sophie

Sophie

distrib > Mandriva > 9.1 > ppc > by-pkgid > 24acc2d557ec1b024bc0a7d80512eb29 > files > 19

spamassassin-tools-2.44-1.1.91mdk.ppc.rpm

#!/usr/bin/perl -w

# options
#
# -c=file       set $cffile
# --fork        use fork-based model to save RAM (used for mailbox files)
# --all         don't skip big messages
# --mh          use mh-style folder (directory of messages named with numbers)
# --mn          find_missed('nonspam.log')
# --ms          find_missed('spam.log')
# --single      folder is a single mail message
# --loghits     log the text hit for patterns (useful for debugging)
# --nokillfile  don't use the "QUICK HACK"
#
# options for maildir and mh-style folders
# --head=N      only check first N messages
# --sort        sort contents of folders
# --tail=N      only check last N messages
#
# all following arguments are used as the name of mail folders

###########################################################################
# QUICK HACK: some of our spamtraps are getting nonspam newsletters. ignore
# those mails here...

my $killfile_message_ids = qr{(?:
    <\d\d\d\d\S+\@dev\.lifetimetv\.com>
    |<\d+\.\S+\@fmail..\.real-net\.net>
    |<3D53A59C000A4029\@mta3n\.bluewin\.ch>
)}x;

my $killfile_receiveds = qr{(?:
    from\s\S+\s\S+\sby\s(?:anclsmtp\d\d|myfamlsmtp\d\d)\.myfamily\.com
    |for :include:\S+/MapQuest/
)}x;

###########################################################################

use vars qw($opt_c $opt_fork $opt_all $opt_head $opt_mh $opt_ms $opt_mn
	    $opt_single $opt_sort $opt_tail $opt_nokillfile);

use FindBin;
use lib "$FindBin::Bin/lib";
use lib "$FindBin::Bin/../lib";
use Mail::ArchiveIterator;
use Mail::SpamAssassin;
use Mail::SpamAssassin::NoMailAudit;
use Getopt::Long;

# flush buffer immediately so if mass-check fails or stops we don't get a corrupt line
$|++;

GetOptions("c=s", "fork", "all", "head=i", "mh", "ms", "mn", "single", "sort",
	   "tail=i", "loghits", "nokillfile");

#use Devel::Peek;

if ($opt_c) {
  $cffile = $opt_c;
}
else {
  $cffile = "$FindBin::Bin/../rules";
}

if ($opt_ms) {
  find_missed('spam.log');
}
elsif ($opt_mn) {
  find_missed('nonspam.log');
}

$spamtest = new Mail::SpamAssassin ({
  'rules_filename'      => $cffile,
  'userprefs_filename'  => "$FindBin::Bin/spamassassin.prefs",
  'save_pattern_hits'   => $opt_loghits,
  'dont_copy_prefs'     => 1,
  'local_tests_only'    => 1
});

$spamtest->compile_now(1);
$spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf");

my $iter = new Mail::ArchiveIterator ({
        'opt_fork' => $opt_fork,
        'opt_mh' => $opt_mh,
        'opt_single' => $opt_single,
        'opt_sort' => $opt_sort,
        'opt_head' => $opt_head,
        'opt_tail' => $opt_tail,
        'opt_all' => $opt_all
  });

my $who = `id -un 2>/dev/null`; chop $who;
my $where = `uname -n 2>/dev/null`; chop $where;
my $when = `date`; chop $when;

print "# mass-check results from $who\@$where, on $when\n";
print "# M:SA version ".$spamtest->Version()."\n";
print '# CVS tag: $Name:  $',"\n";
$iter->set_function (\&wanted);
$iter->run (@ARGV);
exit;

###########################################################################

sub wanted {
  my ($id, $dataref) = @_;

  # my $ma = Mail::Audit->new('data' => \@msg);
  my $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref);

  unless ($opt_nokillfile) {
    # killfile for mass-checks
    $_ = $ma->get_header("Message-Id");
    if (defined($_) && /${killfile_message_ids}/os)
                { logkilled($ma,$id, "unwanted Message-Id"); return; }

    $_ = join ('', $ma->get_header("Received"));
    if (defined($_) && /${killfile_receiveds}/os)
                { logkilled($ma,$id, "unwanted Received chain"); return; }
  }

  $ma->{noexit} = 1;
  $spamtest->remove_spamassassin_markup($ma);
  my $status = $spamtest->check ($ma);
  $status->rewrite_mail ();

  $_ = $ma->get ("X-Spam-Status");
  /^(\S+), hits=(\S+) required=\S+ tests=(.+)\s*$/s;

  my $yorn = $status->is_spam();
  my $hits = $status->get_hits();
  my $tests = $status->get_names_of_tests_hit();

  $tests = join(',', sort(split(/,/, $tests)));

  $id =~ s/\s/_/g;

  printf "%s %2d %s %s\n",
		    ($yorn ? 'Y' : '.'),
		    $hits, $id, $tests;

  if ($tests =~ /MICROSOFT_EXECUTABLE|MIME_SUSPECT_NAME/) {
    logkilled($ma,$id, "possible virus");
  }

  if ($opt_loghits) {
    my $out = '';
    foreach my $t (sort keys %{$status->{pattern_hits}}) {
      $_ = $status->{pattern_hits}->{$t};
      $_ ||= '';
      s/\r/\\r/gs;      # fix unprintables
      s/\n/\\n/gs;
      $out .= "$t=\"$_\" ";
    }
    if ($out ne '') {
      chop $out; print "# ".$out."\n";
    }
  }

  $status->finish();
  undef $ma;		# clean 'em up
  undef $status;

  #system ("ps alxww | grep mass-check | grep perl | grep -v grep");#JMD
}

sub logkilled {
  my ($ma, $id, $reason) = @_;

  my $from = $ma->get_header ("From");
  my $to = $ma->get_header ("To");
  my $subj = $ma->get_header ("Subject");
  my $msgid = $ma->get_header ("Message-Id");
  $msgid ||= $ma->get_header ("Message-ID");
  $msgid ||= '(??)';
  chop ($from);
  chop ($to);
  chop ($subj);
  chop ($msgid);
  warn "# skipped killfiled message ($reason): from=$from to=$to subj=$subj msgid=$msgid id=$id\n";
}

###########################################################################

sub find_missed {
  my $file = shift;

  my $threshold = 5;

  my $shouldbespam = 1;
  if ($file =~ /nonspam/) { $shouldbespam = 0; }

  my $scores = readscores();

  open (IN, "<$file");
  while (<IN>) {
    next if /^#/;
    /^.\s+\d+\s+(\S+)\s*/ or next;
    my $id = $1;
    my $score = 0.0;

    $_ = $'; s/,,+/,/g; s/^\s+//; s/\s+$//;
    foreach my $tst (split (/,/, $_)) {
      next if ($tst eq '');
      if (!defined $scores->{$tst}) {
	warn "unknown test in $file, ignored: $tst\n";
	next;
      }
      $score += $scores->{$tst};
    }

    if ($shouldbespam && $score < $threshold) {
      found_missed ($score, $id, $_);
    } elsif (!$shouldbespam && $score > $threshold) {
      found_missed ($score, $id, $_);
    }
  }
  close IN;
}

sub readscores {
  print "Reading scores from \"$cffile\"...\n";
  system ("./parse-rules-for-masses -d \"$cffile\"") and die;
  my %scores;
  require "./tmp/rules.pl";
  return \%scores;
}

sub found_missed {
  my $score = shift;
  my $id = shift;
  my $tests = shift;

  print "$score $id $tests\n";
}