#!/usr/bin/perl
# sa-addon-stats.pl
# written by Bowie Bailey
#
# Ver: 1.1 9/27/2006
# Changed rule detection code to pick up rules from fuzzyocr plugin
#
# Ver: 1.0 8/17/2006
# First release
use strict;
use Getopt::Long;
use Cwd 'abs_path';
my $logdir;
my $fileregex;
my $configdir;
my $toprules;
my $help;
GetOptions (
'configdir|c=s' => \$configdir,
'logdir|l=s' => \$logdir,
'filename|f=s' => \$fileregex,
'num|n=i' => \$toprules,
'help|h' => \$help,
);
if ($help) {
print "\nusage: $0 [-l
] [-f ] [-n ]\n\n";
print "\t--configdir|-l \tDirectory containing the addon cf files\n";
print "\t\t\t\t(Default: /etc/mail/spamassassin)\n";
print "\t--logdir|-l \tDirectory containing spamd logs\n";
print "\t\t\t\t(Default: /var/log)\n";
print "\t--filename|-f \tFile names or regex to look for in the logdir\n";
print "\t\t\t\t(Default: ^maillog\$)\n";
print "\t--num|-n \tNumber of top rules to display (Default: 20)\n";
print "\t--help|-h\tPrints this help\n";
exit;
}
$logdir = ( $logdir || '/var/log' );
$fileregex = ( $fileregex || '^maillog$' );
$configdir = ( $configdir || '/etc/mail/spamassassin' );
$toprules = ( $toprules || 20 );
$logdir = abs_path($logdir);
$configdir = abs_path($configdir);
chdir ($configdir);
my %rulesets;
my %rules;
my %filenames;
my %files;
# Read all of the rule files and track the rules in each
for my $file (<*.cf>) {
chomp $file;
open FILE, $file;
for my $line () {
if ($line =~ /^\s*(?:header|body|uri|rawbody|full|meta)\s+(?!__)(\S+)/)
{
if (not $filenames{$1})
{
push @{$rulesets{$file}}, $1;
$filenames{$1} = $file;
$rules{$1}{score} = 0;
}
}
if ($line =~ /^\s*score\s+(?!__)(\S+)\s+(\S+)/i) {
$rules{$1}{score} = $2;
if (not $filenames{$1})
{
push @{$rulesets{$file}}, $1;
$filenames{$1} = $file;
}
}
# if ($line =~ /^\s*score\s+(\S+)\s+(\S+)/i) {
# push @{$rulesets{$file}}, $1;
# $rules{$1}{score} = $2;
# $filenames{$1} = $file;
# }
}
close FILE;
}
# Total count variables
my $hamcnt;
my $spamcnt;
chdir ($logdir);
for my $logfile (<*>) {
chomp $logfile;
next unless (-f $logfile and $logfile =~ /$fileregex/i);
if ($logfile =~ /\.gz$/)
{
open FILE, "gunzip -c $logfile |";
}
elsif ($logfile =~ /\.Z$/)
{
open FILE, "uncompress -c $logfile |";
}
else
{
open FILE, $logfile;
}
# Read each spamd result line and count rule hits
for my $line () {
if ($line =~ /spamd: result: (.) +\S+ - (\S+)/) {
my $res = $1; # Spam Y/N
my $hits = $2; # Rule hits
my $type;
if ($1 eq 'Y') {
$spamcnt++;
$type = 'spam';
}
else {
$hamcnt++;
$type = 'ham';
}
my %hitfile;
for my $hit (split(',', $hits)) {
$rules{$hit}{$type}++;
# Only one hit per cf file per message for file-level counts
my $filename = $filenames{$hit};
if (not $hitfile{$filename}) {
$files{$filename}{$type}++;
$hitfile{$filename} = 1;
}
}
}
}
close FILE;
}
my $total = $hamcnt + $spamcnt;
print "Total: $total\n";
print "Ham: $hamcnt\n";
print "Spam: $spamcnt\n\n";
# Print full info for each ruleset
for my $ruleset (sort keys %rulesets) {
my $rsham;
my $rsspam;
print "$ruleset:\n";
print " Rule Name Score Ham Spam \%of Ham \%of Spam\n";
print " -----------------------------------------------------------------------\n";
for my $rule (@{$rulesets{$ruleset}}) {
$rules{$rule}{ruleset} = $ruleset;
my $pham = $rules{$rule}{ham}/$hamcnt *100;
my $pspam = $rules{$rule}{spam}/$spamcnt *100;
printf " %-25s %6.2f %5d %5d %5.2f%% %5.2f%%\n", $rule, $rules{$rule}{score}, $rules{$rule}{ham}, $rules{$rule}{spam}, $pham, $pspam;
}
# Print overall ruleset info. Note that this is number of spams hit by the file, not just number of rule hits.
my $pham = $files{$ruleset}{ham}/$hamcnt *100;
my $pspam = $files{$ruleset}{spam}/$spamcnt *100;
print " -----------------------------------------------------------------------\n";
printf " %-25s %5d %5d %5.2f%% %5.2f%%\n", "OVERALL", $files{$ruleset}{ham}, $files{$ruleset}{spam}, $pham, $pspam;
print "\n";
}
print "\n";
# Print an overview of the performance of each rule set
print "Ruleset overview\n";
print " Ruleset Ham Spam \%of Ham \%of Spam\n";
print " --------------------------------------------------------------------\n";
for my $ruleset (sort {$files{$b}{spam} <=> $files{$a}{spam}} keys %files) {
next unless ($ruleset);
printf " %-30s %5d %5d %5.2f%% %5.2f%%\n", $ruleset, $files{$ruleset}{ham}, $files{$ruleset}{spam}, $files{$ruleset}{ham}/$hamcnt * 100, $files{$ruleset}{spam}/$spamcnt * 100;
}
print "\n\n";
my @topspam = sort {$rules{$b}{spam} <=> $rules{$a}{spam}} keys %rules;
my @topham = sort {$rules{$b}{ham} <=> $rules{$a}{ham}} keys %rules;
print "Addon Rules hitting the most spam (top $toprules)\n";
print " Ruleset Rule Name \% of Spam\n";
print " -----------------------------------------------------------\n";
my $cnt = 0;
for my $rule (@topspam) {
next unless ($rules{$rule}{ruleset});
printf " %-25s %-25s %5.2f%%\n", $rules{$rule}{ruleset}, $rule, $rules{$rule}{spam}/$spamcnt * 100;
$cnt++;
last if ($cnt >= $toprules);
}
print "\n\n";
print "Addon Rules hitting the most ham (top $toprules)\n";
print " Ruleset Rule Name \% of Ham\n";
print " -----------------------------------------------------------\n";
my $cnt = 0;
for my $rule (@topham) {
next unless ($rules{$rule}{ruleset});
printf " %-25s %-25s %5.2f%%\n", $rules{$rule}{ruleset}, $rule, $rules{$rule}{ham}/$hamcnt * 100;
$cnt++;
last if ($cnt >= $toprules);
}