#!/usr/bin/perl
#
# SALogAnalysis (Spamassassin Log Analysis)
# Provide basic usage statistics from Sendmail logs about spam that
# SpamAssassin is finding, and can limit it based on SpamAssassin
# config files, such as my custom recent.cf and antidrugs.cf configs
# Todd A. Lyons <tlyons@ivenue.com>
# Rev 0  --  10/19/04


use strict;
use Getopt::Long;

my %opts;           # For CLI arguments
my %logentries;     # Total number of lines (debugging only)
my %spamreject;     # Detected and rejected as spam
my %spamrule;       # Count of individual rules
my %configrule;     # Rules from reference config file
my $date;           # Temp variable for associative index
my $forcedate;      # User passed specific day to be scanned for
my $host;           # Machine's hostname

GetOptions(
	"configfile=s"  => \$opts{'configfile'},
	"debug=i"	=> \$opts{'debug'},
	"date=s"	=> \$opts{'date'},
	"datedetail"    => \$opts{'datedetail'},
	"h"             => \$opts{'h'},
	"help"          => \$opts{'h'},
	"logfile=s"     => \$opts{'logfile'},
	"spamdetail"    => \$opts{'spamdetail'},
);

sub debug {
	my ($level) = @_;
	if ($opts{'debug'} eq $level ) {
		return 1;
	} else {
		return 0;
	};
	# Debug functions
	# 1  => Show each match as it cycles through array
	# 2  => Show rules from reference file
	# 3  => Comparison of reference file data to logfile data
	# 8  => Show syslog date info
	# 9  => Show rejected spams
};

sub usage {
	print "
	smlogsummary.pl [options]

	--help                    List options (this screen).
	--configfile={filename}   Config file for master list of rules.
	                          Masks datedetail and spamdetail output.
	--date={today|yesterday}  Show today's or yesterday's summary.
	                          Not setting does all days in logfile.
	--datedetail              Gives detail of rules matched per day.
	--debug={num}             Various debug output.
	--logfile={filename}      Which log file to analyze. Defaults
                                  to reading from STDIN.
	--spamdetail              Gives summary per spam rule.
	\n";
}

sub syslog_date {
	my ($description) = @_;

	use Date::Calc qw(Today Month_to_Text Add_Delta_Days);
	my $seperator;
	my $when = 0;
	debug(8) && print "$description passed to syslog_date()\n";
	if ( $description =~ m/yesterday/i ) {
		$when--;
	}
	my ($year,$month,$day) = Add_Delta_Days (Today(), $when);
	my $temp = Month_to_Text( $month ) =~ m/(...).*/;
	my $month_short = $1;

	if ( $day =~ m/^.$/ ) {
	        $seperator = "  ";
	} else {
	        $seperator = " ";
	}

	return $month_short . $seperator . $day;
}

sub printsummary {
	# Print out summary
	#print "\nPer Day Totals:\n";
	#foreach (sort keys %logentries) {
	#print "$_ -> $logentries{$_}\n";
	#}
	print "\nMail summary for $ENV{'HOSTNAME'}\n";
	# If user specified a config file to use as a reference, then
	# will ignore the --datedetail or --spamdetail options since
	# that verbose output would quickly scroll this date off the
	# screen.
	if ( $opts{'configfile'} ) {
		my %configtemp;
		foreach my $outerkey(sort keys %spamrule) {
			my $outerkeycount;
			foreach my $key (keys %{$spamrule{$outerkey}}) {
				foreach my $configkey (keys %configrule) {
					if ( $outerkey eq $configkey ) {
						$outerkeycount = $outerkeycount + $spamrule{$outerkey}{$key};
						#print "$key -> $configkey -> $spamrule{$outerkey}{$key}\n";
						$configtemp{$configkey} = $outerkeycount;
					}
					debug(3) && print "$outerkey compared to $configkey\n";
				}
			}
		}
		print "\nPer rule spam detection summary from config file:\n";
		print "$opts{'configfile'}\n";
		foreach my $key ( sort keys %configtemp) {
			print "$key -> $configtemp{$key}\n";
		}
	# Otherwise go and and do the --datedetail and/or --spamdetail.
	} else {
		if ( $opts{'datedetail'} or $opts{'spamdetail'} ) {
			print "\nPer rule spam detection summary\n";
		}
		foreach my $outerkey(sort keys %spamrule) {
			my $outerkeycount;
			foreach my $key (keys %{$spamrule{$outerkey}}) {
				if ( $opts{'datedetail'} ) {
					print "$key -> $outerkey -> $spamrule{$outerkey}{$key}\n";
				}
				$outerkeycount = $outerkeycount + $spamrule{$outerkey}{$key};
			}
			if ( $opts{'spamdetail'} ) {
				print "$outerkey -> $outerkeycount\n";
			}
		}
	}
	# Print the generic totals regardless.
	print "\nDetected spam totals\n";
	foreach	 (sort keys %spamreject) {
		print "$_ -> $spamreject{$_}\n";
	}
	print "\n";
}

sub populate_reference_config {
	# Find only lines that set the score
	if ( m/^score\s+([^\s]+)\s+([^\s]+)/ ) {
		my $ruletemp = $1;
		my $scoretemp = $2;
		$configrule{$ruletemp}=$scoretemp;
		debug(2) && print "Rule: $ruletemp, Score: $scoretemp\n";
	}
}

sub mainloop {
	# Detect Date
	if ( s/^(...\s..)\s..:..:..\s[^\s]+\s(sendmail|sm-mta)\[[\d]+\]:\s([^\s]+):\s// ) {
		if ( "$forcedate" and "$forcedate" ne "$1" ) { next; }
		chomp;
		unless ( "$forcedate" ) { $date = $1; }
	} elsif ( s/^(...\s..)\s..:..:..\s[^\s]+\s(sendmail|sm-mta)\[[\d]+\]:\s// ) {
		if ( "$forcedate" and "$forcedate" ne "$1" ) { next; }
		chomp;
		unless ( "$forcedate" ) { $date = $1; }
	}

	# Detect delivered mail
	if (m/tests=([^\s]+)/ ) {
		my $spamtemp = $1;
		if ($spamtemp =~ /none/) { next; };
		$spamtemp =~ s/\\n\\t//g;
		$spamreject{$date}++;
		debug(9) && print "Caught $spamtemp\n";
		my @spamtemp2 = split (/\,/, $spamtemp);
		foreach my $key (@spamtemp2) {
			$spamrule{$key}{$date}++;
			debug(1) && print "$key\n";
		}
			
	} 
}

if ( $opts{'h'} ) {
	usage;
	exit 0;
}

if ( $opts{'date'} ) {
	$forcedate = syslog_date($opts{'date'});
	$date = $forcedate;
	debug(8) && print "$forcedate returned from syslog_date()\n";
}

if ( $opts{'configfile'} ) {
	open (INFILE, '<', $opts{'configfile'}) or
		die "Couldn't open $opts{'configfile'}\n";
		while (<INFILE>) { populate_reference_config; };
		close INFILE;
}

if ( $opts{'logfile'} ) {
        open (INFILE, '<', $opts{'logfile'}) or
                die "Couldn't open $opts{'logfile'}\n";
	        while (<INFILE>) { mainloop; };
	        close INFILE;

	} else {
	        while (<>) { mainloop; };
	}

printsummary;

