Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: SpamAssassin: commits

svn commit: rev 7043 - incubator/spamassassin/trunk/tools

 

 

SpamAssassin commits RSS feed   Index | Next | Previous | View Threaded


duncf at apache

Mar 7, 2004, 3:28 PM

Post #1 of 1 (33 views)
Permalink
svn commit: rev 7043 - incubator/spamassassin/trunk/tools

Author: duncf
Date: Sun Mar 7 14:28:21 2004
New Revision: 7043

Modified:
incubator/spamassassin/trunk/tools/sa-stats.pl
Log:
Bug 2988: Patch from Bob Apthorpe for sa-stats.pl

Modified: incubator/spamassassin/trunk/tools/sa-stats.pl
==============================================================================
--- incubator/spamassassin/trunk/tools/sa-stats.pl (original)
+++ incubator/spamassassin/trunk/tools/sa-stats.pl Sun Mar 7 14:28:21 2004
@@ -22,19 +22,24 @@

use strict;

-#Configuration section
+# Configuration section
my %opt = ();
-$opt{'logfile'} = '/var/log/maillog'; # Log file
+$opt{'logfile'} = '/var/log/maillog'; # Log file
$opt{'sendmail'} = '/usr/sbin/sendmail'; # Path to sendmail stub
$opt{'from'} = 'SpamAssassin System Admin'; # Who is the mail from
$opt{'end'} = "";
$opt{'start'} = "today";

+my $diag = '';
+$diag .= "Default options:\n" . join('', map { "opt{$_} => " . ($opt{$_} || '<undefined>') . "\n" } (sort keys %opt));
+# &vdbg($diag); # This won't work until *after* getopt() is called. Duh.
+
##########################################################
############# Nothing to edit below here #################
##########################################################

my $VERSION = '$Id$';
+my ($VER_NUM) = '$Revision: 6256 $' =~ m#\$Revision:\s+(\S+)#o;

# internal modules (part of core perl distribution)
use Getopt::Long;
@@ -44,7 +49,11 @@
use Date::Manip;
use Parse::Syslog;

-my $tstart = time;
+my %timing = ();
+$timing{'start'} = 0;
+$timing{'end'} = 0;
+$timing{'hrsinperiod'} = 0;
+$timing{'telapsed'} = time;

Getopt::Long::Configure("bundling");
GetOptions('logfile|l=s' => \$opt{'logfile'},
@@ -54,6 +63,8 @@
'debug|D' => \$opt{'debug'},
'userstats|u' => \$opt{'userstats'},
'verbose|v' => \$opt{'verbose'},
+ 'html|H' => \$opt{'html'},
+ 'top|T:25' => \$opt{'topusers'},
'help|h' => \$opt{'help'},
'version|V' => \$opt{'version'},
'start|s=s' => \$opt{'start'},
@@ -69,31 +80,45 @@
exit 0;
}

-#Local variables
-my $mean_spam_score = 0;
-my $mean_spam_time = 0;
-my $mean_spam_bytes = 0;
-
-my $mean_ham_score = 0;
-my $mean_ham_time = 0;
-my $mean_ham_bytes = 0;
+# No point in specifying topusers w/o specifying userstats; coerce -u if -T
+if ($opt{'topusers'}) {
+ $opt{'userstats'} = 1;
+}
+
+$diag .= "\nUser options:\n" . join('', map { "opt{$_} => " . ($opt{$_} || '<undefined>') . "\n" } (sort keys %opt));
+&vdbg($diag . "\n");
+
+# Local variables

-my %stats = ();
# %stats is a multidimensional hash with the following structure:
-# $stats{'total'}{'bytes'}
-# $stats{'total'}{'count'}
-# $stats{'total'}{'time'}
-# $stats{'total'}{'threshold'}
-# $stats{'ham'}{'bytes'}
-# $stats{'ham'}{'count'}
-# $stats{'ham'}{'time'}
-# $stats{'ham'}{'score'}
-# $stats{'ham'}{'byhour'}
-# $stats{'spam'}{'bytes'}
-# $stats{'spam'}{'count'}
-# $stats{'spam'}{'time'}
-# $stats{'spam'}{'score'}
-# $stats{'spam'}{'byhour'}
+my %stats = ();
+
+$stats{'spam'}{'mean_score'} = 0;
+$stats{'spam'}{'mean_time'} = 0;
+$stats{'spam'}{'mean_bytes'} = 0;
+
+$stats{'ham'}{'mean_score'} = 0;
+$stats{'ham'}{'mean_time'} = 0;
+$stats{'ham'}{'mean_bytes'} = 0;
+
+$stats{'total'}{'bytes'} = 0;
+$stats{'total'}{'count'} = 0;
+$stats{'total'}{'time'} = 0;
+$stats{'total'}{'score'} = 0;
+# $stats{'total'}{'byhour'}{$hr}
+$stats{'total'}{'threshold'} = 0;
+
+$stats{'ham'}{'bytes'} = 0;
+$stats{'ham'}{'count'} = 0;
+$stats{'ham'}{'time'} = 0;
+$stats{'ham'}{'score'} = 0;
+# $stats{'ham'}{'byhour'}{$hr}
+
+$stats{'spam'}{'bytes'} = 0;
+$stats{'spam'}{'count'} = 0;
+$stats{'spam'}{'time'} = 0;
+$stats{'spam'}{'score'} = 0;
+# $stats{'spam'}{'byhour'}{$hr}

my %userstats = ();
# $userstats{$recipient}{'total'}{'bytes'}
@@ -108,32 +133,47 @@
# $userstats{$recipient}{'spam'}{'time'}
# $userstats{$recipient}{'spam'}{'score'}

-my ($start, $end) = parse_arg($opt{'start'}, $opt{'end'});
+# my ($start, $end) = parse_arg($opt{'start'}, $opt{'end'});
+($timing{'start'}, $timing{'end'}) = parse_arg($opt{'start'}, $opt{'end'});
+
+&vdbg("Timing:\nstart = $timing{'start'} " . UnixDate("epoch " . $timing{'start'}, '%C')
+ . "\n end = $timing{'end'} " . UnixDate("epoch " . $timing{'end'}, '%C') . "\n\n");

-die "Can't find " . $opt{'logfile'} . " $!\n" unless (-e $opt{'logfile'});
+die "Can't find " . $opt{'logfile'} . " $!\n" unless (-e $opt{'logfile'} || ($opt{'logfile'} eq '-'));
+
+my $logyear = UnixDate("epoch " . $timing{'start'}, "%Y");
+my $logmonth = UnixDate("epoch " . $timing{'start'}, "%m") - 1;
+
+&vdbg("Creating log parser: Parse::Syslog->new(" . $opt{'logfile'}
+ . ", year => $logyear, _last_mon => $logmonth,)\n"
+ . "Note that _last_mon = month - 1\n\n");

my $parser = Parse::Syslog->new( $opt{'logfile'} ,
- year => UnixDate("epoch $start", "%Y"),
- _last_mon => UnixDate("epoch $start", "%m") - 1);
-# Hack for end-of-year support -- sets _last_mon to current month (0 based, not 1 based)
+ year => $logyear,
+ _last_mon => $logmonth,);
+# Hack for end-of-year support -- sets _last_mon to current month (0 based, not
+# 1 based)
+
+&vdbg("##### Entering parseloop:\n\n");

parseloop:
while (my $sl = $parser->next) {
+ &vdbg('Found log entry at ' . $sl->{'timestamp'} . ' for ' . $sl->{'program'} . ' containing ' . $sl->{'text'} . "\n");
next parseloop unless ($sl->{'program'} eq 'spamd');
if ($sl->{'text'} =~ m/
(clean\smessage|identified\sspam)\s # Status
\(([-0-9.]+)\/([-0-9.]+)\)\s # Score, Threshold
for\s
- ([^:]+):\d+\s # for daf:1000
+ ([^:]+):\d+\s # for daf:1000
in\s
([0-9.]+)\sseconds,\s+
([0-9]+)\sbytes\.
/x) {

# discard records outside defined analysis interval
- next parseloop if ($sl->{'timestamp'} < $start);
+ next parseloop if ($sl->{'timestamp'} < $timing{'start'});
# We can assume that logs are chronological
- last parseloop if ($sl->{'timestamp'} > $end);
+ last parseloop if ($sl->{'timestamp'} > $timing{'end'});

my $status = $1;
my $score = $2;
@@ -142,14 +182,14 @@
my $time_processed = $5;
my $bytes_processed = $6;

- dbg("Found: " . $sl->{'text'} . "\n");
- dbg(" tstamp : " . $sl->{'timestamp'} . "\n");
- dbg(" status: $status\n");
- dbg(" score : $score\n");
- dbg(" thresh: $threshold\n");
- dbg(" recip : $recipient\n");
- dbg(" time : $time_processed\n");
- dbg(" bytes : $bytes_processed\n\n");
+ dbg("Found: " . $sl->{'text'} . "\n");
+ dbg(" tstamp : " . $sl->{'timestamp'} . "\n");
+ dbg(" status: $status\n");
+ dbg(" score : $score\n");
+ dbg(" thresh: $threshold\n");
+ dbg(" recip : $recipient\n");
+ dbg(" time : $time_processed\n");
+ dbg(" bytes : $bytes_processed\n\n");

my $clean_recipient = lc($recipient);
$clean_recipient =~ s#\+[^@]*(@?)#$1#;
@@ -178,8 +218,10 @@

# Total score
$stats{'total'}{'count'}++;
+ $stats{'total'}{'score'} += $score;
$stats{'total'}{'bytes'} += $bytes_processed;
$stats{'total'}{'time'} += $time_processed;
+ $stats{'total'}{'byhour'}{$abshour}++;
$stats{'total'}{'threshold'} += $threshold;

if ($opt{'userstats'}) {
@@ -200,147 +242,88 @@
}
}

+&vdbg("##### Exiting parseloop:\n\n");
+
#Calculate some numbers
-my $threshavg = 0;
-my $spampercent = 0;
-my $hampercent = 0;
-my $bytesperhour = 0;
-my $emailperhour = 0;
-my $secperhour = 0;
-
-my $spamcount = $stats{'spam'}{'count'} || 0;
-my $hamcount = $stats{'ham'}{'count'} || 0;
-my $totalcount = $stats{'total'}{'count'} || 0;
-
-my $hrsinperiod = (($end-$start) / 3600);
-
-if ($totalcount > 0) {
- if ($spamcount > 0) {
- $mean_spam_score = $stats{'spam'}{'score'} / $spamcount;
- $mean_spam_time = $stats{'spam'}{'time'} / $spamcount;
- $mean_spam_bytes = $stats{'spam'}{'bytes'} / $spamcount;
- $spampercent = (($spamcount/$totalcount) * 100);
- }
-
- if ($hamcount > 0) {
- $mean_ham_score = $stats{'ham'}{'score'} / $hamcount;
- $mean_ham_time = $stats{'ham'}{'time'} / $hamcount;
- $mean_ham_bytes = $stats{'ham'}{'bytes'} / $hamcount;
- $hampercent = (($hamcount/$totalcount) * 100);
- }
-
- $threshavg = $stats{'total'}{'threshold'} / $totalcount;
- $emailperhour = ($totalcount/$hrsinperiod);
- $bytesperhour = ($stats{'total'}{'bytes'} / $hrsinperiod);
- $secperhour = ($stats{'total'}{'time'} / $hrsinperiod);
-}
+my %aggregate_stats = ();

-my $oldfh;
-#Open Sendmail if we are mailing it
-if ($opt{'mail'}) {
- open (SENDMAIL, "|$opt{'sendmail'} -oi -t -odq") or die "Can't open sendmail: $!\n";
- print SENDMAIL "From: $opt{'from'}\n";
- print SENDMAIL "To: $opt{'mail'}\n";
- print SENDMAIL "Subject: SpamAssassin statistics\n\n";
- $oldfh = select SENDMAIL;
-}
+$aggregate_stats{'threshavg'} = 0;
+$aggregate_stats{'spampercent'} = 0;
+$aggregate_stats{'hampercent'} = 0;
+$aggregate_stats{'bytesperhour'} = 0;
+$aggregate_stats{'emailperhour'} = 0;
+$aggregate_stats{'secperhour'} = 0;
+
+$timing{'hrsinperiod'} = (($timing{'end'} - $timing{'start'}) / 3600);
+
+if ($stats{'total'}{'count'} > 0) {
+ $stats{'total'}{'mean_score'} = ($stats{'total'}{'score'} || 0) / $stats{'total'}{'count'};
+
+ if ($stats{'spam'}{'count'} > 0) {
+ $stats{'spam'}{'mean_score'} = $stats{'spam'}{'score'} / $stats{'spam'}{'count'};
+ $stats{'spam'}{'mean_time'} = $stats{'spam'}{'time'} / $stats{'spam'}{'count'};
+ $stats{'spam'}{'mean_bytes'} = $stats{'spam'}{'bytes'} / $stats{'spam'}{'count'};
+ $aggregate_stats{'spampercent'} = (($stats{'spam'}{'count'}/$stats{'total'}{'count'}) * 100);
+ }

-my $telapsed = time - $tstart;
+ if ($stats{'ham'}{'count'} > 0) {
+ $stats{'ham'}{'mean_score'} = $stats{'ham'}{'score'} / $stats{'ham'}{'count'};
+ $stats{'ham'}{'mean_time'} = $stats{'ham'}{'time'} / $stats{'ham'}{'count'};
+ $stats{'ham'}{'mean_bytes'} = $stats{'ham'}{'bytes'} / $stats{'ham'}{'count'};
+ $aggregate_stats{'hampercent'} = (($stats{'ham'}{'count'}/$stats{'total'}{'count'}) * 100);
+ }

-#Output results
-my $rpt = '';
-$rpt .= "Report Title : SpamAssassin - Spam Statistics\n";
-$rpt .= "Report Date : " . strftime("%F", localtime) . "\n";
-$rpt .= "Period Beginning : " . strftime("%c", localtime($start)) . "\n";
-$rpt .= "Period Ending : " . strftime("%c", localtime($end)) . "\n";
-$rpt .= "\n";
-$rpt .= sprintf("Reporting Period : %.2f hrs\n", $hrsinperiod);
-$rpt .= "--------------------------------------------------\n";
-$rpt .= "\n";
-$rpt .= "Note: 'ham' = 'nonspam'\n";
-$rpt .= "\n";
-$rpt .= sprintf("Total spam detected : %8d (%7.2f%%)\n", $spamcount, $spampercent || 0);
-$rpt .= sprintf("Total ham accepted : %8d (%7.2f%%)\n", $hamcount, $hampercent || 0);
-$rpt .= " -------------------\n";
-$rpt .= sprintf("Total emails processed : %8d (%5.f/hr)\n", $totalcount, $emailperhour || 0);
-$rpt .= "\n";
-$rpt .= sprintf("Average spam threshold : %11.2f\n", $threshavg || 0);
-$rpt .= sprintf("Average spam score : %11.2f\n", $mean_spam_score || 0);
-$rpt .= sprintf("Average ham score : %11.2f\n", $mean_ham_score || 0);
-$rpt .= "\n";
-$rpt .= sprintf("Spam kbytes processed : %8d (%5.f kb/hr)\n",
- $stats{'spam'}{'bytes'}/1024,
- $stats{'spam'}{'bytes'}/(1024 * $hrsinperiod));
-$rpt .= sprintf("Ham kbytes processed : %8d (%5.f kb/hr)\n",
- $stats{'ham'}{'bytes'}/1024,
- $stats{'ham'}{'bytes'}/(1024 * $hrsinperiod));
-$rpt .= sprintf("Total kbytes processed : %8d (%5.f kb/hr)\n",
- $stats{'total'}{'bytes'}/1024, $bytesperhour/1024);
-$rpt .= "\n";
-$rpt .= sprintf("Spam analysis time : %8d s (%5.f s/hr)\n",
- $stats{'spam'}{'time'},
- $stats{'spam'}{'time'}/$hrsinperiod);
-$rpt .= sprintf("Ham analysis time : %8d s (%5.f s/hr)\n",
- $stats{'ham'}{'time'},
- $stats{'ham'}{'time'}/$hrsinperiod);
-$rpt .= sprintf("Total analysis time : %8d s (%5.f s/hr)\n",
- $stats{'total'}{'time'}, $secperhour);
-$rpt .= "\n\n";
-$rpt .= "Statistics by Hour\n";
-$rpt .= "-------------------------------------\n";
-$rpt .= "Hour Spam Ham\n";
-$rpt .= "-------------- -------- --------\n";
-
-my $hour = floor($start/3600);
-while ($hour < $end/3600) {
- $rpt .= sprintf("%s %8d %8d\n",
- strftime("%F, %H", localtime($hour*3600)),
- $stats{'spam'}{'byhour'}{$hour} || 0,
- $stats{'ham'}{'byhour'}{$hour} || 0);
- $hour++;
-}
-$rpt .= "\n\n";
-
-if ($opt{'userstats'}) {
- my $usercount = scalar(keys(%userstats));
- if ($usercount > 0) {
- my $upper_userlimit = ($usercount > 25) ? 25 : $usercount;
-
- $rpt .= "Top $upper_userlimit spam victims:\n";
- $rpt .= "User S AvScr H AvScr Count % Count Bytes % Bytes Time % Time\n";
- $rpt .= "-------------------------------- ------- ------- -------- ---------- -------- ---------- -------- ----------\n";
- foreach my $user (sort {
- $userstats{$b}{'total'}{'count'} <=> $userstats{$a}{'total'}{'count'}
- } keys %userstats) {
- $rpt .= sprintf("%-32s %7.2f %7.2f %8d (%7.2f%%) %8d (%7.2f%%) %8d (%7.2f%%)\n",
- $user,
- ($userstats{$user}{'spam'}{'count'} > 0) ?
- $userstats{$user}{'spam'}{'score'} / $userstats{$user}{'spam'}{'count'} : 0,
- ($userstats{$user}{'ham'}{'count'} > 0) ?
- $userstats{$user}{'ham'}{'score'} / $userstats{$user}{'ham'}{'count'} : 0,
- $userstats{$user}{'spam'}{'count'},
- ($userstats{$user}{'total'}{'count'} > 0) ?
- $userstats{$user}{'spam'}{'count'} / $userstats{$user}{'total'}{'count'} * 100 : 0,
- $userstats{$user}{'spam'}{'bytes'},
- ($userstats{$user}{'total'}{'bytes'} > 0) ?
- $userstats{$user}{'spam'}{'bytes'} / $userstats{$user}{'total'}{'bytes'} * 100 : 0,
- $userstats{$user}{'spam'}{'time'},
- ($userstats{$user}{'total'}{'time'} > 0) ?
- $userstats{$user}{'spam'}{'time'} / $userstats{$user}{'total'}{'time'} * 100 : 0,
- );
+ $aggregate_stats{'threshavg'} = $stats{'total'}{'threshold'} / $stats{'total'}{'count'};
+ $aggregate_stats{'emailperhour'} = ($stats{'total'}{'count'}/$timing{'hrsinperiod'});
+ $aggregate_stats{'bytesperhour'} = ($stats{'total'}{'bytes'} / $timing{'hrsinperiod'});
+ $aggregate_stats{'secperhour'} = ($stats{'total'}{'time'} / $timing{'hrsinperiod'});
+
+ foreach my $partition (qw(ham spam total)) {
+ $stats{$partition}{'d_mean_score'} = sprintf("%.2f", $stats{$partition}{'mean_score'});
+ $stats{$partition}{'d_kbytes'} = sprintf("%.2f", $stats{$partition}{'bytes'} / 1024);
+ $stats{$partition}{'d_mbytes'} = sprintf("%.2f", $stats{$partition}{'bytes'} / (1024 * 1024));
+ foreach my $metric (qw(count bytes time)) {
+ if ($partition eq 'total') {
+ $stats{'total'}{'percent'}{$metric} = '100%';
+ } else {
+ if (defined($stats{'total'}{$metric}) && ($stats{'total'}{$metric} > 0)) {
+ $stats{$partition}{'percent'}{$metric} =
+ sprintf("%.2f%%", 100 * ($stats{$partition}{$metric} || 0) / $stats{'total'}{$metric});
+ } else {
+ $stats{$partition}{'percent'}{$metric} = '-- %';
+ }
+ }
+
+ $stats{$partition}{'perhour'}{$metric} =
+ sprintf("%.2f", ($stats{$partition}{$metric} || 0) / $timing{'hrsinperiod'});
}
+ $stats{$partition}{'d_time'} = sprintf("%.1f", $stats{$partition}{'time'});
+ $stats{$partition}{'perhour'}{'d_kbytes'} = sprintf("%.2f", $stats{$partition}{'perhour'}{'bytes'} / 1024);
+ $stats{$partition}{'perhour'}{'d_mbytes'} = sprintf("%.2f", $stats{$partition}{'perhour'}{'bytes'} / (1024 * 1024));
}
- $rpt .= "\n";
+
}

-$rpt .= "Done. Report generated in $telapsed sec.\n";
+$timing{'telapsed'} = time - $timing{'telapsed'};

-print $rpt;
+# build report
+my $rpt = '';
+if ($opt{'html'}) {
+ $rpt = &build_html_report(\%timing, \%stats, \%userstats, \%aggregate_stats, );
+} else {
+ $rpt = &build_text_report(\%timing, \%stats, \%userstats, \%aggregate_stats, );
+}

-#Close Senmdmail if it was opened
+# send report via mail or just print it out
if ($opt{'mail'}) {
- select $oldfh;
+ open (SENDMAIL, "|$opt{'sendmail'} -oi -t -odq") or die "Can't open sendmail: $!\n";
+ print SENDMAIL "From: $opt{'from'}\n";
+ print SENDMAIL "To: $opt{'mail'}\n";
+ print SENDMAIL "Subject: SpamAssassin statistics\n\n";
+ print SENDMAIL $rpt;
close (SENDMAIL);
+} else {
+ print $rpt;
}

#All done
@@ -350,6 +333,438 @@
# Subroutines ###############################################################
#############################################################################

+
+########################################
+# Build text report #
+########################################
+sub build_text_report {
+ my $Rh_timing = shift;
+ my $Rh_stats = shift;
+ my $Rh_userstats = shift;
+ my $Rh_aggregate_stats = shift;
+
+ my $rpt = '';
+ $rpt .= "Report Title : SpamAssassin - Spam Statistics\n";
+ $rpt .= "Report Date : " . strftime("%Y-%m-%d", localtime) . "\n";
+ $rpt .= "Period Beginning : " . strftime("%c", localtime($Rh_timing->{'start'})) . "\n";
+ $rpt .= "Period Ending : " . strftime("%c", localtime($Rh_timing->{'end'})) . "\n";
+ $rpt .= "\n";
+ $rpt .= sprintf("Reporting Period : %.2f hrs\n", $Rh_timing->{'hrsinperiod'});
+ $rpt .= "--------------------------------------------------\n";
+ $rpt .= "\n";
+ $rpt .= "Note: 'ham' = 'nonspam'\n";
+ $rpt .= "\n";
+ $rpt .= sprintf("Total spam detected : %8d (%7.2f%%)\n", $Rh_stats->{'spam'}{'count'}, $Rh_aggregate_stats->{'spampercent'} || 0);
+ $rpt .= sprintf("Total ham accepted : %8d (%7.2f%%)\n", $Rh_stats->{'ham'}{'count'}, $Rh_aggregate_stats->{'hampercent'} || 0);
+ $rpt .= " -------------------\n";
+ $rpt .= sprintf("Total emails processed : %8d (%5.f/hr)\n", $Rh_stats->{'total'}{'count'}, $Rh_aggregate_stats->{'emailperhour'} || 0);
+ $rpt .= "\n";
+ $rpt .= sprintf("Average spam threshold : %11.2f\n", $Rh_aggregate_stats->{'threshavg'} || 0);
+ $rpt .= sprintf("Average spam score : %11.2f\n", $Rh_stats->{'spam'}{'mean_score'} || 0);
+ $rpt .= sprintf("Average ham score : %11.2f\n", $Rh_stats->{'ham'}{'mean_score'} || 0);
+ $rpt .= "\n";
+ $rpt .= sprintf("Spam kbytes processed : %8d (%5.f kb/hr)\n",
+ $Rh_stats->{'spam'}{'bytes'}/1024,
+ $Rh_stats->{'spam'}{'bytes'}/(1024 * $Rh_timing->{'hrsinperiod'}));
+ $rpt .= sprintf("Ham kbytes processed : %8d (%5.f kb/hr)\n",
+ $Rh_stats->{'ham'}{'bytes'}/1024,
+ $Rh_stats->{'ham'}{'bytes'}/(1024 * $Rh_timing->{'hrsinperiod'}));
+ $rpt .= sprintf("Total kbytes processed : %8d (%5.f kb/hr)\n",
+ $Rh_stats->{'total'}{'bytes'}/1024, $Rh_aggregate_stats->{'bytesperhour'}/1024);
+ $rpt .= "\n";
+ $rpt .= sprintf("Spam analysis time : %8d s (%5.f s/hr)\n",
+ $Rh_stats->{'spam'}{'time'},
+ $Rh_stats->{'spam'}{'time'}/$Rh_timing->{'hrsinperiod'});
+ $rpt .= sprintf("Ham analysis time : %8d s (%5.f s/hr)\n",
+ $Rh_stats->{'ham'}{'time'},
+ $Rh_stats->{'ham'}{'time'}/$Rh_timing->{'hrsinperiod'});
+ $rpt .= sprintf("Total analysis time : %8d s (%5.f s/hr)\n",
+ $Rh_stats->{'total'}{'time'}, $Rh_aggregate_stats->{'secperhour'});
+ $rpt .= "\n\n";
+ $rpt .= "Statistics by Hour\n";
+ $rpt .= "----------------------------------------------------\n";
+ $rpt .= "Hour Spam Ham\n";
+ $rpt .= "------------- ----------------- --------------\n";
+
+ my $hour = floor($Rh_timing->{'start'}/3600);
+
+ while ($hour < $Rh_timing->{'end'}/3600) {
+
+ my $hourly_spam_percent = 0;
+ my $hourly_ham_percent = 0;
+
+ if (defined($Rh_stats->{'total'}{'byhour'}{$hour}) && ($Rh_stats->{'total'}{'byhour'}{$hour} > 0)) {
+ if (!defined($Rh_stats->{'spam'}{'byhour'}{$hour}) || $Rh_stats->{'spam'}{'byhour'}{$hour} == 0) {
+ $Rh_stats->{'spam'}{'byhour'}{$hour} = 0;
+ $hourly_ham_percent = 100;
+ } elsif (!defined($Rh_stats->{'ham'}{'byhour'}{$hour}) || $Rh_stats->{'ham'}{'byhour'}{$hour} == 0) {
+ $Rh_stats->{'ham'}{'byhour'}{$hour} = 0;
+ $hourly_spam_percent = 100;
+ } else {
+ $hourly_spam_percent = 100 * $Rh_stats->{'spam'}{'byhour'}{$hour} / $Rh_stats->{'total'}{'byhour'}{$hour};
+ $hourly_ham_percent = 100 * $Rh_stats->{'ham'}{'byhour'}{$hour} / $Rh_stats->{'total'}{'byhour'}{$hour};
+ }
+ }
+
+ $rpt .= sprintf("%-16s %8d (%3d%%) %8d (%3d%%)\n",
+ strftime("%Y-%m-%d %H", localtime($hour*3600)),
+ $Rh_stats->{'spam'}{'byhour'}{$hour} || 0,
+ $hourly_spam_percent,
+ $Rh_stats->{'ham'}{'byhour'}{$hour} || 0,
+ $hourly_ham_percent);
+ $hour++;
+ }
+ $rpt .= "\n\n";
+
+ if ($opt{'userstats'}) {
+ my $topusers = 25;
+ if (defined($opt{'topusers'}) && ($opt{'topusers'} > 0)) {
+ $topusers = $opt{'topusers'};
+ }
+ my $usercount = scalar(keys(%{$Rh_userstats}));
+ if ($usercount > 0) {
+ my $upper_userlimit = ($usercount > $topusers) ? $topusers : $usercount;
+
+ $rpt .= "Top $upper_userlimit spam victims:\n";
+ $rpt .= "User S AvScr H AvScr Count % Count Bytes % Bytes Time % Time\n";
+ $rpt .= "-------------------------------- ------- ------- -------- ---------- -------- ---------- -------- ----------\n";
+ foreach my $user (sort {
+ $Rh_userstats->{$b}{'total'}{'count'} <=> $Rh_userstats->{$a}{'total'}{'count'}
+ } keys %{$Rh_userstats}) {
+
+ foreach my $partition (qw(spam ham total)) {
+ foreach my $metric (qw(score bytes count time)) {
+ $Rh_userstats->{$user}{$partition}{$metric} ||= 0;
+ }
+ }
+
+ $rpt .= sprintf("%-32s %7.2f %7.2f %8d (%7.2f%%) %8d (%7.2f%%) %8d (%7.2f%%)\n",
+ $user,
+ ($Rh_userstats->{$user}{'spam'}{'count'} > 0) ?
+ $Rh_userstats->{$user}{'spam'}{'score'} / $Rh_userstats->{$user}{'spam'}{'count'} : 0,
+ ($Rh_userstats->{$user}{'ham'}{'count'} > 0) ?
+ $Rh_userstats->{$user}{'ham'}{'score'} / $Rh_userstats->{$user}{'ham'}{'count'} : 0,
+ $Rh_userstats->{$user}{'spam'}{'count'},
+ ($Rh_userstats->{$user}{'total'}{'count'} > 0) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'count'} / $Rh_userstats->{$user}{'total'}{'count'} : 0,
+ $Rh_userstats->{$user}{'spam'}{'bytes'},
+ ($Rh_userstats->{$user}{'total'}{'bytes'} > 0) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'bytes'} / $Rh_userstats->{$user}{'total'}{'bytes'} : 0,
+ $Rh_userstats->{$user}{'spam'}{'time'},
+ ($Rh_userstats->{$user}{'total'}{'time'} > 0) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'time'} / $Rh_userstats->{$user}{'total'}{'time'} : 0,
+ );
+ }
+ }
+ $rpt .= "\n";
+ }
+
+ my $codename = $0;
+ $codename =~ s#^.*/##o;
+ $rpt .= "Done. Report generated in " . $Rh_timing->{'telapsed'} . " sec by $codename, version $VER_NUM.\n";
+
+ return $rpt;
+}
+
+########################################
+# Build HTML report #
+########################################
+sub build_html_report {
+ my $Rh_timing = shift;
+ my $Rh_stats = shift;
+ my $Rh_userstats = shift;
+ my $Rh_aggregate_stats = shift;
+
+ my $rpt = '';
+
+ my $d_now = strftime("%c", localtime(time));
+ my $d_start = strftime("%A, %B %e %Y %T %Z", localtime($Rh_timing->{'start'}));
+ my $d_end = strftime("%A, %B %e %Y %T %Z", localtime($Rh_timing->{'end'}));
+ my $d_telapsed = $Rh_timing->{'telapsed'};
+ my $d_period = sprintf("%.2f", $Rh_timing->{'hrsinperiod'});
+
+ my $d_mean_thresh = sprintf("%.2f", $Rh_aggregate_stats->{'threshavg'});
+
+ my $t_spam_overview =<<"T_OVERVIEW";
+<table border="1" summary="Aggregate mail statistics">
+<tr>
+<th rowspan="2"></th>
+<th colspan="3">Messages</th>
+<th colspan="3">Size</th>
+<th colspan="3">Time</th>
+<th>Mean Score</th>
+</tr>
+
+<tr>
+<th>[#]</th>
+<th>[#/hr]</th>
+<th>[%]</th>
+<th>[Kb]</th>
+<th>[Kb/hr]</th>
+<th>[%]</th>
+<th>[s]</th>
+<th>[s/hr]</th>
+<th>[%]</th>
+<th>[#]</th>
+</tr>
+
+<tr>
+<th bgcolor="#CCFFCC">Ham</th>
+<td align="right">$Rh_stats->{'ham'}{'count'}</td>
+<td align="right">$Rh_stats->{'ham'}{'perhour'}{'count'}</td>
+<td align="right">$Rh_stats->{'ham'}{'percent'}{'count'}</td>
+<td align="right">$Rh_stats->{'ham'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'ham'}{'perhour'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'ham'}{'percent'}{'bytes'}</td>
+<td align="right">$Rh_stats->{'ham'}{'d_time'}</td>
+<td align="right">$Rh_stats->{'ham'}{'perhour'}{'time'}</td>
+<td align="right">$Rh_stats->{'ham'}{'percent'}{'time'}</td>
+<td align="right">$Rh_stats->{'ham'}{'d_mean_score'}</td>
+</tr>
+
+<tr>
+<th bgcolor="#FFCCCC">Spam</th>
+<td align="right">$Rh_stats->{'spam'}{'count'}</td>
+<td align="right">$Rh_stats->{'spam'}{'perhour'}{'count'}</td>
+<td align="right">$Rh_stats->{'spam'}{'percent'}{'count'}</td>
+<td align="right">$Rh_stats->{'spam'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'spam'}{'perhour'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'spam'}{'percent'}{'bytes'}</td>
+<td align="right">$Rh_stats->{'spam'}{'d_time'}</td>
+<td align="right">$Rh_stats->{'spam'}{'perhour'}{'time'}</td>
+<td align="right">$Rh_stats->{'spam'}{'percent'}{'time'}</td>
+<td align="right">$Rh_stats->{'spam'}{'d_mean_score'}</td>
+</tr>
+
+<tr>
+<th bgcolor="#CCCCFF">Total</th>
+<td align="right">$Rh_stats->{'total'}{'count'}</td>
+<td align="right">$Rh_stats->{'total'}{'perhour'}{'count'}</td>
+<td align="right">$Rh_stats->{'total'}{'percent'}{'count'}</td>
+<td align="right">$Rh_stats->{'total'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'total'}{'perhour'}{'d_kbytes'}</td>
+<td align="right">$Rh_stats->{'total'}{'percent'}{'bytes'}</td>
+<td align="right">$Rh_stats->{'total'}{'d_time'}</td>
+<td align="right">$Rh_stats->{'total'}{'perhour'}{'time'}</td>
+<td align="right">$Rh_stats->{'total'}{'percent'}{'time'}</td>
+<td align="right">$Rh_stats->{'total'}{'d_mean_score'}</td>
+</tr>
+</table>
+T_OVERVIEW
+
+ my $t_hourly =<<"T_HOURLY";
+<table border="0" summary="Hourly ham/spam trends">
+<tr>
+<th colspan="3">Statistics by hour</th>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+<td>&nbsp;</td>
+</tr>
+
+<tr>
+<th>Hour</th>
+<th bgcolor="#FFCCCC">Spam</th>
+<th bgcolor="#CCFFCC">Ham</th>
+<td colspan="10"></td>
+</tr>
+T_HOURLY
+
+ my $hour = floor($Rh_timing->{'start'}/3600);
+ my $prev_day = '';
+
+ my $null_color = 'bgcolor="#CCCCFF"';
+ my $spam_color = 'bgcolor="#FFCCCC"';
+ my $ham_color = 'bgcolor="#CCFFCC"';
+# my $spam_mark = qq{<td $spamcolor">&nbsp;</td>};
+# my $ham_mark = qq{<td $hamcolor">&nbsp;</td>};
+ while ($hour < $Rh_timing->{'end'}/3600) {
+ foreach my $partition (qw(spam ham total)) {
+ $Rh_stats->{$partition}{'byhour'}{$hour} = 0 unless
+ (defined($Rh_stats->{$partition}{'byhour'}{$hour}) &&
+ ($Rh_stats->{$partition}{'byhour'}{$hour} > 0));
+ }
+
+ my $curr_hour = strftime("%H:00", localtime($hour*3600));
+ my $curr_day = strftime("%Y-%m-%d", localtime($hour*3600));
+ if ($curr_day ne $prev_day) {
+ $curr_hour = "<b>$curr_day $curr_hour</b>";
+ }
+ $prev_day = $curr_day;
+
+ my $ham_fraction = 0;
+ my $spam_fraction = 0;
+ my $check_total = $Rh_stats->{'ham'}{'byhour'}{$hour} + $Rh_stats->{'spam'}{'byhour'}{$hour};
+ my $tab_graph = qq{<td colspan="10" $null_color>&nbsp;</td>};
+ if ($Rh_stats->{'total'}{'byhour'}{$hour} > 0) {
+ $spam_fraction = int(10 * ($Rh_stats->{'spam'}{'byhour'}{$hour} / $check_total));
+ $ham_fraction = 10 - $spam_fraction;
+ if ($spam_fraction == 10) {
+ $tab_graph = qq{<td colspan="10" $spam_color>&nbsp;</td>};
+ } elsif ($spam_fraction == 9) {
+ $tab_graph = qq{<td colspan="9" $spam_color>&nbsp;</td>};
+ $tab_graph .= "<td $ham_color>&nbsp;</td>";
+ } elsif ($spam_fraction == 1) {
+ $tab_graph = "<td $spam_color>&nbsp;</td>";
+ $tab_graph .= qq{<td colspan="9" $ham_color>&nbsp;</td>};
+ } elsif ($spam_fraction == 0) {
+ $tab_graph = qq{<td colspan="10" $ham_color>&nbsp;</td>};
+ } else {
+ $tab_graph = qq{<td colspan="$spam_fraction" $spam_color>&nbsp;</td>};
+ $tab_graph .= qq{<td colspan="$ham_fraction" $ham_color>&nbsp;</td>};
+ }
+# $tab_graph = ($spam_mark x $spam_fraction) . ($ham_mark x $ham_fraction);
+ }
+
+ $t_hourly .= sprintf(qq{<tr align="right"><td>%s</td><td>%d</td><td>%d</td>%s</tr>\n},
+ $curr_hour,
+ $Rh_stats->{'spam'}{'byhour'}{$hour},
+ $Rh_stats->{'ham'}{'byhour'}{$hour},
+ $tab_graph,);
+
+ $hour++;
+ }
+ $t_hourly .= "</table>\n";
+
+ my $t_userstats = '';
+ if ($opt{'userstats'} && defined($Rh_stats->{'total'}{'count'}) &&
+ ($Rh_stats->{'total'}{'count'} > 0)) {
+ my $topusers = 25;
+ if (defined($opt{'topusers'}) && ($opt{'topusers'} > 0)) {
+ $topusers = $opt{'topusers'};
+ }
+
+ my $usercount = scalar(keys(%{$Rh_userstats}));
+ if ($usercount > 0) {
+ my $upper_userlimit = ($usercount > $topusers) ? $topusers : $usercount;
+
+ $t_userstats =<<"T_USERSTATS";
+<table border="1" summary="Top $upper_userlimit spam victims">
+<tr>
+<th colspan="3">Top $upper_userlimit spam victims</th>
+<th colspan="6" bgcolor="#FFCCCC">Spam</th>
+</tr>
+<tr>
+<th rowspan="2">User</th>
+<th colspan="2">Avg. Score</th>
+<th colspan="2">Messages Received</th>
+<th colspan="2">Bytes Received</th>
+<th colspan="2">Processing Time</th>
+</tr>
+<tr>
+<th bgcolor="#FFCCCC">Spam</th>
+<th bgcolor="#CCFFCC">Ham</th>
+<th>[#]</th>
+<th>%</th>
+<th>[bytes]</th>
+<th>%</th>
+<th>[s]</th>
+<th>%</th>
+</tr>
+T_USERSTATS
+
+# $rpt .= "Top $upper_userlimit spam victims:\n";
+# $rpt .= "User S AvScr H AvScr Count % Count Bytes % Bytes Time % Time\n";
+# $rpt .= "-------------------------------- ------- ------- -------- ---------- -------- ---------- -------- ----------\n";
+ foreach my $user (sort {
+ $Rh_userstats->{$b}{'total'}{'count'} <=> $Rh_userstats->{$a}{'total'}{'count'}
+ } keys %{$Rh_userstats}) {
+
+ foreach my $partition (qw(spam ham total)) {
+ foreach my $metric (qw(score bytes count)) {
+ $Rh_userstats->{$user}{$partition}{$metric} ||= 0;
+ }
+ }
+
+ my %avg_score = ();
+ foreach my $partition (qw(ham spam total)) {
+
+ foreach my $metric (qw(count bytes time)) {
+ $Rh_userstats->{$user}{$partition}{$metric} = 0 unless
+ (defined($Rh_userstats->{$user}{$partition}{$metric}));
+ }
+
+ if ($partition ne 'total') {
+ if (defined($Rh_userstats->{$user}{$partition}{'count'})
+ && ($Rh_userstats->{$user}{$partition}{'count'} > 0)) {
+ $avg_score{$partition} = sprintf('%.2f',
+ $Rh_userstats->{$user}{$partition}{'score'}
+ / $Rh_userstats->{$user}{$partition}{'count'});
+ } else {
+ $avg_score{$partition} = 0;
+ }
+ }
+ }
+
+ $t_userstats .= sprintf(qq{<tr align="right"><td align="left">%s</td><td>%.2f</td><td>%.2f</td><td>%d</td><td>%.2f%%</td><td>%d</td><td>%.2f%%</td><td>%d</td><td>%.2f%%</td></tr>\n},
+ $user,
+ $avg_score{'spam'},
+ $avg_score{'ham'},
+ $Rh_userstats->{$user}{'spam'}{'count'},
+ (defined($Rh_userstats->{$user}{'total'}{'count'}) && ($Rh_userstats->{$user}{'total'}{'count'} > 0)) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'count'} / $Rh_userstats->{$user}{'total'}{'count'} : 0,
+ $Rh_userstats->{$user}{'spam'}{'bytes'},
+ (defined($Rh_userstats->{$user}{'total'}{'bytes'}) && ($Rh_userstats->{$user}{'total'}{'bytes'} > 0)) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'bytes'} / $Rh_userstats->{$user}{'total'}{'bytes'} : 0,
+ $Rh_userstats->{$user}{'spam'}{'time'},
+ (defined($Rh_userstats->{$user}{'total'}{'time'}) && ($Rh_userstats->{$user}{'total'}{'time'} > 0)) ?
+ 100 * $Rh_userstats->{$user}{'spam'}{'time'} / $Rh_userstats->{$user}{'total'}{'time'}: 0,
+ );
+ }
+
+ $t_userstats .= "</table>\n<hr>\n";
+
+ }
+ }
+
+ my $codename = $0;
+ $codename =~ s#^.*/##o;
+
+ $rpt .=<<"HTMLPAGE";
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>SpamAssassin Statistics: $d_start - $d_end</title>
+</head>
+<body>
+<h1>SpamAssassin Statistics:</h1>
+<p>
+Period of <b>$d_period</b> hour(s) extending from<br>
+<b>$d_start</b> to<br>
+<b>$d_end</b>
+</p>
+<p>
+Generated on <b>$d_now</b> in $d_telapsed second(s) by $codename, version $VER_NUM.
+</p>
+<hr>
+<p>
+Note: 'ham' = 'nonspam'
+</p>
+<p>
+The mean spam threshold score is $d_mean_thresh; mail scoring below the threshold is ham, mail scoring at or above the threshold is spam.
+</p>
+$t_spam_overview
+<hr>
+$t_hourly
+<hr>
+$t_userstats
+<p>
+Generated on <b>$d_now</b> in $d_telapsed second(s) by $codename, version $VER_NUM.
+</p>
+</body>
+</html>
+HTMLPAGE
+
+ return $rpt;
+}
+
+
########################################
# Process parms #
########################################
@@ -392,47 +807,96 @@
}

sub dbg {
- my $msg = shift;
+ print STDERR @_ if ($opt{debug});
+}

- if ($opt{debug}) {
- print STDERR $msg;
- }
+sub vdbg {
+ print STDERR @_ if ($opt{debug} && $opt{verbose});
}

__END__

=head1 NAME
-
+
sa-stats.pl - Builds received spam/ham report from mail log
-
+
=head1 VERSION
-
- $Revision: 1.5 $
-
+
+ $Revision: 1.17 $
+
=head1 SYNOPSIS
-
+
Usage: sa-stats.pl [options]

Options:
- -l, --logfile=filename logfile to read (default: /var/log/maillog)
+ -l, --logfile=filename logfile to read
+ (default: /var/log/maillog)
-s, --start Sets date/time for start of reporting period
-e, --end Sets date/time for end of reporting period
- -u, --userstats Generates stats for the top 25 spam victims
+ -u, --userstats Generates stats for the top spam victims
+ (default is 25; see -T)
+ -H, --html Generates HTML report
+ (default: plain text)
+ -T, --top=# Display top # spam victims
+ (# defaults to 25; -T implies -u)
-h, --help Displays this message
-V, --version Display version info
--mail=emailaddress Sends report to emailaddress
- --sendmail=/path/to/sendmail Location of sendmail binary (default: /usr/sbin/sendmail)
- --from=emailaddress Sets From: field of mail
- -v, --verbose Sets verbose mode
+ --sendmail=/path/to/sendmail Location of sendmail binary
+ (default: /usr/sbin/sendmail)
+ --from=emailaddress Sets From: field of mail
+ -v, --verbose Sets verbose mode (requires -D)
-D, --debug Sets debug mode

=head1 DESCRIPTION

Creates simple text report of spam/ham detected by SpamAssassin by
-parsing the mail log (generally /var/log/maillog)
+parsing spamd entries in the mail log (generally /var/log/maillog)
+
+=head1 EXAMPLES
+
+To generate a text report from midnight to present using /var/log/maillog:
+
+ ./sa-stats.pl -s 'midnight' -e 'now' > sa_stats.txt
+
+To generate an HTML report including the top 5 spam victims for the month of
+January 2004 from compressed mail logs:
+
+ gunzip -c /var/log/maillog-200401*.gz | ./sa-stats.pl -H -T 5 -l - \
+ -s '2001-01-01 00:00:00' -e '2004-01-31 23:59:59' > jan_2004_stats.html
+
+Note the use of '-' as a filename to represent STDIN.
+
+To generate a text report with per-user stats from yesterday, reading from
+/var/log/mail and turning on all debugging output:
+
+ ./sa-stats.pl -v -D -u -l /var/log/mail \
+ -s 'yesterday midnight' 1>stats.txt 2>stats.err
+
+=head1 TIPS
+
+=over 4
+
+=item *
+
+Are you running spamd? Currently sa-stats.pl only reads syslog entries from
+spamd; it doesn't work with MTA-level calls to Mail::SpamAssassin or with logs
+generated by the spamassassin perl script.
+
+=item *
+
+Are there spamd entries in your mail log? Use 'grep spamd /var/log/maillog' to find out.
+
+=item *
+
+Are there spamd entries in your mail log within the analysis interval? Run
+'sa-stats.pl -v -D ...' to see the entries that are found and discarded as well
+as to see the actual analysis interval.
+
+=back

=head1 DEPENDENCIES
-
+
=over 4

=item *
@@ -456,37 +920,46 @@
Parse::Syslog;

=back
-
+
=head1 BUGS
-
+
+=over 4
+
=item *

Because of poor year handling in Parse::Syslog, the script may not
work well when the log file dates back to the previous year.
-
+
+=back
+
=head1 TO DO

=over 4
-
+
=item *
+
Find bugs

=item *
+
Fix bugs

=item *
+
Don't call /usr/sbin/sendmail directly; use Mail::Internet or Net::SMTP or other standard module

=item *
-Add support for piped-in logs, compressed logs (see gzopen() from Compress::Zlib)
+
+Add support for compressed logs (see gzopen() from Compress::Zlib)

=item *
-Have --verbose and --debug actually do something.
+
+Have --verbose work without --debug

=back
-
+
=head1 AUTHORS
-
+
Brad Rathbun <brad [at] computechnv> http://www.computechnv.com/

Bob Apthorpe <apthorpe+sa [at] cynistar> http://www.cynistar.net/~apthorpe/
@@ -495,7 +968,6 @@

=head1 SEE ALSO

-Mail::SpamAssassin
-
-=cut
+Mail::SpamAssassin, Date::Manip, spamd(1)

+=cut

SpamAssassin commits RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.