Sophie

Sophie

distrib > Mageia > 6 > armv5tl > media > core-release > by-pkgid > 04bfc96c18dd7f68f1b48bd2b31c568f > files > 5

cleanfeed-20020501-13.mga6.noarch.rpm

# vim: set syntax=perl ts=4 ai si:

######################### IMPORTANT! - READ CAREFULLY ########################
# This file contains some parts of my own cleafeed.local.
# Many of these checks are too much content based than what I feel
# confortable to put in the official source and others are just
# experimental or need site-specific tweaks.
# Before using any of this code please *think*, and be sure you really
# understand what it does.
######################### IMPORTANT! - READ CAREFULLY ########################

my @badaspnntps = (
	'PostIT Now',
	'Jobsearch Limited',
	'AudioWeb',					# audioweb.com
	'Alex',						# sex spam
	'Paul Simmons',				# OperationIT.com
	'Alan',						# equest.com
	'Digital Media Works',		# html sex spam
	'Captive Technology',		# ccsscorp.com jobs flood
	'Computer Horzions ISG',	# isgjobs.com jobs flood
	'Mike Powers',				# ResumeGateway.com jobs flood
);

my %badaspnntp = map { $_ => 1 } @badaspnntps;
undef @badaspnntps;

sub local_filter_first {
	my $localpost = 0;

	$localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /;

	study $hdr{__BODY__} if $hdr{__LINES__} <= 250;

	# local posts ############################################################
	if ($localpost) {
		if ($hdr{Approved}) {
		    foreach (@groups) {
				if (not /^(?:alt|wind|inwind)\./) {
					saveart('CF.approved');
					return reject("Forged approval in $_ ($hdr{Approved})");
				}
		    }
		}

#		if ($hdr{Subject} =~ /^R: /) { saveart('L.r'); }

		return reject('Non usare HTML in Usenet!')
			if $hdr{'Content-Type'} and ($hdr{'Content-Type'} =~ m#text/html#
				or $hdr{'Content-Type'} =~ m#multipart/alternative#);
	}
	##########################################################################

	# save articles coming from broken sites so I can LART them
	foreach (@groups) {
		next unless $hdr{Newsgroups} =~ /^it/;
		$gr{it}++;
		saveart('W.nomod') if exists $Moderated{$_} and not $hdr{Approved};
	}

	# enforce it.* hierarchy restrictions
	if ($gr{it} and (@groups > 10 or @followups > 3)) {
		saveart('CF.ECP');
		return reject('Excessive crosspost');
	}

	# specific sites or companies ############################################
	return reject("Job spam ($1)")
		if $hdr{From} =~ /@(ajilon\.ca|ntes\.com|trai\.com|lesliecorp\.com|topechelon\.net|ERecruitingWorld\.com|(?:data\.)?JobBankUSA\.com|resumes\.gojobs\.com|chemjobs\.net|eurosoft-inc\.com|newlonservices\.com|medzilla\.com|gisajob\.com|geologics\.com|brainhunter\.com|dsijobs\.com|offsitetechies\.com)\b/
			or $hdr{'Message-ID'} =~ /\@((?:webhire|hrsites|jobcircle|sans)\.com)>$/;

	return reject('NNTP Monitor', 'Bot Signature')
		if $hdr{From} =~ /^NNTP-Monitor\@/;
}

sub local_filter_bot {
	if ($hdr{'X-Newsreader'}) {
		if ($hdr{'X-Newsreader'} =~ /^AspNNTP \S+ \((.*)\)/) {
			return reject('AspNNTP', 'Bot signature')
				if exists $badaspnntp{$1};
			#saveart('W.aspnntp', $hdr{'X-Newsreader'});
		}
	}
}

# most articles with hashbusters are caught by the MD5 filter anyway, I need
# to check why there are not
sub local_filter_after_emp {
	if ($hdr{__LINES__} < 250 and not $gr{reports}) {
		if ($hdr{__LINES__} < 25) {
			return reject('lcbot 60+end+short', 'Bot signature')
				if $hdr{__BODY__} =~ /\n[a-z]{60,}\n+$/;
			return reject('lcbot 7+only+num', 'Bot signature')
				if $hdr{Subject} =~ /  \d{4,5}/
					and $hdr{__BODY__} =~ /^\n[a-z]{7,}\n+$/;
			return reject('lcbot 12+end+short+num', 'Bot signature')
				if $hdr{Subject} =~ / \d{2,5}$/ and
					$hdr{__BODY__} =~ /\n[a-z]{12,}\n+$/;
		}

		return reject('lcbot 100', 'Bot signature')
			if $hdr{__BODY__} =~ /^[a-z]{100,}$/m;
		return reject('lcbot 80+end', 'Bot signature')
			if $hdr{__BODY__} =~ /\n[a-z]{80,}\n+$/;
		return reject('lcbot 30+num', 'Bot signature')
			if $hdr{Subject} =~ /  \d{2,5}$/
				and $hdr{__BODY__} =~ /^[a-z]{30,}$/m;

		if (not $hdr{References}
			and $hdr{__BODY__} =~ /\n{2,}[a-zA-Z0-9]{27,}\n+$/) {
			if ($hdr{Subject} =~ / [a-zA-Z0-9]{1,}$/) {
				saveart('W.mchash');
				return reject('mcbot 30+end', 'Bot signature');
			}
			saveart('W.mchash2'); # all f.p.
		}

	} # hdr{__LINES__} < 250 and not $gr{reports}

	return '';
}

sub local_filter_last {
	# body checks ############################################################
	if ($hdr{__LINES__} < 250 and not $gr{reports}) {

# Warning: this check has some false positives
		if ($hdr{Subject} =~ m#\[[^0]/[^1]\]$# and not $hdr{References}
				and $hdr{__BODY__} =~ /\n[a-z]{12,}\n*$/
#				and $hdr{__BODY__} !~ /^begin [0-7]{3,4} /m
#				and not is_binary()	# XXX
				) {
			saveart('CF.sette0');
			return reject('7 bot', 'Bot signature');
		}

		# I suppose I can't add new domains forever
		if (not $hdr{'X-Mailer'} and not $hdr{'X-Newsreader'} and
				not $hdr{References} and
				$hdr{__BODY__} =~ /www\.(?:pure-instinct\.com|get-some-mojo\.com|magnetizewomen\.com|makeherscream\.net|wantmoresex\.com|lovesenses\.com|sexfit\.net|enhancelibido\.net|lovesenses\.com|bettersexlife\.com|erect4life\.com|androsfit\.com|smokefreelungs\.com|evidencegone\.com|biggertool\.com|forthepuss\.com|moreladies\.com|improve-libido\.com|openthathole\.com|at7x\.com|fuas\.net|dheafit\.com|sexboxoffice\.com|increasemanhood\.com|getsomeass\.com|nicotineaddict\.net|perkupsexdrive\.com|dateseverynight\.com|hot-products\.net|greatproducts\.net|landinbed\.com|getfemales\.net|sexattention\.com|allurefem\.com|smokerusa\.com|improve-libido\.com|youngeryears\.com|compelthem\.net|fightimpotency\.com|drawherin\.com|invitelust\.com|youlivelonger\.com)/) {
			saveart('CF.repsisdom');
			return reject('Repsis');
		}
	}

	my $localpost = 0;
	$localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /;

	if ($config{watch_cancels} and $localpost) {
		$LocalPosts{$hdr{'Message-ID'}} = $now;
	}

#	saveart('W.longsubj') if length $hdr{Subject} > 160;
#	saveart('W.space') if $hdr{Subject} =~ / {15,}[^ ]/;
	saveart('W.repostnotrej')
		if $hdr{Subject} =~ /^REPOST: / and $hdr{Path} =~ /!resurrector!/;
	return '';
}

sub local_filter_cancel {
	my $localpost = 0;
	$localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /;

	my $id = $hdr{Cancel};
	$id =~ s/.* //;
	return '' if not $id;

	if ($config{reject_suspect_cancels} and $localpost
			and not INN::havehist($1)) {
		# return reject('Cancel for a missing article', 'Rogue cancel');
		saveart('W.localcancelunknown');
	}

	if ($config{watch_cancels} and $localpost and not $LocalPosts{$id}) {
		# return reject('Cancel for a non local article', 'Rogue cancel');
		saveart('W.nonlocalcancel');
	}

	return reject('Rogue cancel (mindspring)')
		if $hdr{Approved} =~ /deputydawg\@altavista\.com/;

	if ($hdr{__LINES__} > 20 and $hdr{__BODY__} !~ /^Path: /m) {
		saveart('R.long');
		return reject('Rogue cancel (long body)', 'Rogue cancel');
	}

	return '';
}

# here I save some articles I want to check.
sub local_filter_reject {
	my ($vr, $sr) = @_;

	saveart('CF.local', $vr)
		if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /;
	saveart('WARN.it', $vr)
		if $vr =~ /^NewsAgent/ and $hdr{Newsgroups} =~ /\bit\./;
	saveart('W.supersedes') if $vr =~ /^Excessive Supersedes/;
#	saveart('CF.scoring', $vr) if $vr =~ /^Scoring filter/;
	saveart('CF.NewsAgent', $vr) if $vr =~ /^NewsAgent/;
	saveart('CF.SEX', $vr) if $vr =~ /^Sex spam/ and $lines < 300;
	saveart('Z.EMP', $vr) if $vr =~ /^EMP/;
	saveart('R.nanacancel') if $vr eq 'Cancel in forbidden group';

	return @_;
}

sub local_config {
	%config_local = (
		block_late_cancels => 1,
		active_file => '/news/db/active',
		statfile => '/news/log/cleanfeed.stats',
#		html_statfile => '/news/log/cleanfeed.stats.html',
		stats_interval => 300,
		do_emp_dump => 1,
		emp_dump_file => '/news/tmp/empdump',
		debug_batch_directory => '/news/spam',
	);

	%config_append = (
		bin_allowed => '^alt\.mag\.',
	);

	$Restricted_Groups{netscape} = '^netscape\.';

	$config{reject_suspect_cancels} = 0;
	$config{watch_cancels} = 0;
	if ($config{watch_cancels}) {
		eval { require AnyDBM_File; import AnyDBM_File;
				require Fcntl; import Fcntl; }; # XXX ugly
		if ($@) {
			$config{watch_cancels} = undef;
			slog('E', 'Cannot load AnyDBM_File: ' . $@);
		}
		tie %LocalPosts, 'AnyDBM_File', "$config_dir/posts",
				&Fcntl::O_CREAT|&Fcntl::O_RDWR, 0666
			or slog('E', 'Cannot load AnyDBM_File: ' . $!);
	}
}

print $now.$config_dir.$lines.%Restricted_Groups.%Moderated.%config_local.%config_append.@followups if 0; # lint food

1;