#!/usr/bin/perl # sa-addon-stats.pl # written by Bowie Bailey # # Ver: 1.1 9/27/2006 # Changed rule detection code to pick up rules from fuzzyocr plugin # # Ver: 1.0 8/17/2006 # First release use strict; use Getopt::Long; use Cwd 'abs_path'; my $logdir; my $fileregex; my $configdir; my $toprules; my $help; GetOptions ( 'configdir|c=s' => \$configdir, 'logdir|l=s' => \$logdir, 'filename|f=s' => \$fileregex, 'num|n=i' => \$toprules, 'help|h' => \$help, ); if ($help) { print "\nusage: $0 [-l ] [-f ] [-n ]\n\n"; print "\t--configdir|-l \tDirectory containing the addon cf files\n"; print "\t\t\t\t(Default: /etc/mail/spamassassin)\n"; print "\t--logdir|-l \tDirectory containing spamd logs\n"; print "\t\t\t\t(Default: /var/log)\n"; print "\t--filename|-f \tFile names or regex to look for in the logdir\n"; print "\t\t\t\t(Default: ^maillog\$)\n"; print "\t--num|-n \tNumber of top rules to display (Default: 20)\n"; print "\t--help|-h\tPrints this help\n"; exit; } $logdir = ( $logdir || '/var/log' ); $fileregex = ( $fileregex || '^maillog$' ); $configdir = ( $configdir || '/etc/mail/spamassassin' ); $toprules = ( $toprules || 20 ); $logdir = abs_path($logdir); $configdir = abs_path($configdir); chdir ($configdir); my %rulesets; my %rules; my %filenames; my %files; # Read all of the rule files and track the rules in each for my $file (<*.cf>) { chomp $file; open FILE, $file; for my $line () { if ($line =~ /^\s*(?:header|body|uri|rawbody|full|meta)\s+(?!__)(\S+)/) { if (not $filenames{$1}) { push @{$rulesets{$file}}, $1; $filenames{$1} = $file; $rules{$1}{score} = 0; } } if ($line =~ /^\s*score\s+(?!__)(\S+)\s+(\S+)/i) { $rules{$1}{score} = $2; if (not $filenames{$1}) { push @{$rulesets{$file}}, $1; $filenames{$1} = $file; } } # if ($line =~ /^\s*score\s+(\S+)\s+(\S+)/i) { # push @{$rulesets{$file}}, $1; # $rules{$1}{score} = $2; # $filenames{$1} = $file; # } } close FILE; } # Total count variables my $hamcnt; my $spamcnt; chdir ($logdir); for my $logfile (<*>) { chomp $logfile; next unless (-f $logfile and $logfile =~ /$fileregex/i); if ($logfile =~ /\.gz$/) { open FILE, "gunzip -c $logfile |"; } elsif ($logfile =~ /\.Z$/) { open FILE, "uncompress -c $logfile |"; } else { open FILE, $logfile; } # Read each spamd result line and count rule hits for my $line () { if ($line =~ /spamd: result: (.) +\S+ - (\S+)/) { my $res = $1; # Spam Y/N my $hits = $2; # Rule hits my $type; if ($1 eq 'Y') { $spamcnt++; $type = 'spam'; } else { $hamcnt++; $type = 'ham'; } my %hitfile; for my $hit (split(',', $hits)) { $rules{$hit}{$type}++; # Only one hit per cf file per message for file-level counts my $filename = $filenames{$hit}; if (not $hitfile{$filename}) { $files{$filename}{$type}++; $hitfile{$filename} = 1; } } } } close FILE; } my $total = $hamcnt + $spamcnt; print "Total: $total\n"; print "Ham: $hamcnt\n"; print "Spam: $spamcnt\n\n"; # Print full info for each ruleset for my $ruleset (sort keys %rulesets) { my $rsham; my $rsspam; print "$ruleset:\n"; print " Rule Name Score Ham Spam \%of Ham \%of Spam\n"; print " -----------------------------------------------------------------------\n"; for my $rule (@{$rulesets{$ruleset}}) { $rules{$rule}{ruleset} = $ruleset; my $pham = $rules{$rule}{ham}/$hamcnt *100; my $pspam = $rules{$rule}{spam}/$spamcnt *100; printf " %-25s %6.2f %5d %5d %5.2f%% %5.2f%%\n", $rule, $rules{$rule}{score}, $rules{$rule}{ham}, $rules{$rule}{spam}, $pham, $pspam; } # Print overall ruleset info. Note that this is number of spams hit by the file, not just number of rule hits. my $pham = $files{$ruleset}{ham}/$hamcnt *100; my $pspam = $files{$ruleset}{spam}/$spamcnt *100; print " -----------------------------------------------------------------------\n"; printf " %-25s %5d %5d %5.2f%% %5.2f%%\n", "OVERALL", $files{$ruleset}{ham}, $files{$ruleset}{spam}, $pham, $pspam; print "\n"; } print "\n"; # Print an overview of the performance of each rule set print "Ruleset overview\n"; print " Ruleset Ham Spam \%of Ham \%of Spam\n"; print " --------------------------------------------------------------------\n"; for my $ruleset (sort {$files{$b}{spam} <=> $files{$a}{spam}} keys %files) { next unless ($ruleset); printf " %-30s %5d %5d %5.2f%% %5.2f%%\n", $ruleset, $files{$ruleset}{ham}, $files{$ruleset}{spam}, $files{$ruleset}{ham}/$hamcnt * 100, $files{$ruleset}{spam}/$spamcnt * 100; } print "\n\n"; my @topspam = sort {$rules{$b}{spam} <=> $rules{$a}{spam}} keys %rules; my @topham = sort {$rules{$b}{ham} <=> $rules{$a}{ham}} keys %rules; print "Addon Rules hitting the most spam (top $toprules)\n"; print " Ruleset Rule Name \% of Spam\n"; print " -----------------------------------------------------------\n"; my $cnt = 0; for my $rule (@topspam) { next unless ($rules{$rule}{ruleset}); printf " %-25s %-25s %5.2f%%\n", $rules{$rule}{ruleset}, $rule, $rules{$rule}{spam}/$spamcnt * 100; $cnt++; last if ($cnt >= $toprules); } print "\n\n"; print "Addon Rules hitting the most ham (top $toprules)\n"; print " Ruleset Rule Name \% of Ham\n"; print " -----------------------------------------------------------\n"; my $cnt = 0; for my $rule (@topham) { next unless ($rules{$rule}{ruleset}); printf " %-25s %-25s %5.2f%%\n", $rules{$rule}{ruleset}, $rule, $rules{$rule}{ham}/$hamcnt * 100; $cnt++; last if ($cnt >= $toprules); }