#!/usr/bin/perl
#
# ExtractEmail (Extract emails from sendmail logs)
# Todd A. Lyons <tlyons@ivenue.com>
# Rev 2  --  11/09/04
# License: GPLv2


use strict;
use Getopt::Long;

my %opts;           # For CLI arguments
my %logentries;     # Total number of lines (debugging only)
my %user;           # Email array
#  $user{$email} Structure
#  $user{$email}->{$id}
#          { 'recvdate' => Date sendmail accepted,
#            'from'     => From email address,
#            'relay'    => Received from,
#            'queueid'  => ID number sendmail uses internally
#            'status'   => Last known status
#            'hits'     => Score of email
#            'tests'    => Tests that matched }
my %idcachefrom;    # Temp array to store id's
#  $idcachefrom{id} # Holds from address
my %scrubber;       # Housecleaning array
#  $scrubber{int};  # Keeps track of id's
my $counter = 0;    # Counter for scrubber
my $email;
my $id;
my $server;
my $date;           # Temp variable for associative index
my $time;           # Temp variable for associative index
my $forcedate;      # User passed specific day to be scanned for,
                    # not really used in this script.

GetOptions(
	"debug=i"	=> \$opts{'debug'},
	"date=s"	=> \$opts{'date'},
	"email=s"	=> \$opts{'email'},
	"h"             => \$opts{'h'},
	"help"          => \$opts{'h'},
	"logfile=s"     => \$opts{'logfile'},
	"nice"          => \$opts{'nice'},
	"rejectsonly"   => \$opts{'rejectsonly'}
);

sub debug {
	my ($level) = @_;
	if ($opts{'debug'} eq $level ) {
		return 1;
	} else {
		return 0;
	};
	# Debug functions
	# 1  => Show queue id's being extracted
	# 2  => 
	# 3  => 
	# 4  => 
	# 5  => 
	# 6  => Show when sleeping (system load saver)
	# 7  => Show hits and tests
	# 8  => Show syslog date info
	# 9  => 
};

sub usage {
	print "
	extractemail.pl [options]

	--help                    List options (this screen).
	--email={foo\@bar.com}     Email address to search for (required).
	--date={today|yesterday}  Show today's or yesterday's summary.
	                          Not setting does all days in logfile.
	--nice                    Sleep 1 second every 1000 lines.
	--rejectsonly             Only show mails not delivered.
	--debug={num}             Various debug output.
	--logfile={filename}      Which log file to analyze, defaults
	                          to reading from STDIN.
	\n";
}

sub syslog_date {
	my ($description) = @_;

	use Date::Calc qw(Today Month_to_Text Add_Delta_Days);
	my $seperator;
	my $when = 0;
	debug(8) && print "$description passed to syslog_date()\n";
	if ( $description =~ m/yesterday/i ) {
		$when--;
	}
	my ($year,$month,$day) = Add_Delta_Days (Today(), $when);
	my $temp = Month_to_Text( $month ) =~ m/(...).*/;
	my $month_short = $1;

	if ( $day =~ m/^.$/ ) {
	        $seperator = "  ";
	} else {
	        $seperator = " ";
	}

	return $month_short . $seperator . $day;
}

sub mainloop {
# Detect Date
if ( s/^(...\s..)\s(..:..):..\s([^\s]+)\s(sendmail|sm-mta)\[[\d]+\]:\s([^\s]+):\s// ) {
	if ( "$forcedate" and "$forcedate" ne "$1" ) { next; }
	chomp;
	unless ( "$forcedate" ) { $date = $1; }
	# Save message ID
	$id = $5;
	$time = $2;
	$server = $3;
	debug(1) and print "$id \n";
	}

	# Detect inbound mail
	if ( m/^from=<([^\s]+)>,.*relay=(.*)$/ ) {
		$idcachefrom{$id} = {
			'from'     => $1,
			'relay'    => $2,
			'queueid'  => $id,
			'recvdate' => $date,
			'server'   => $server,
	       		'recvtime' => $time };
		debug(2) && print "Received: $1 at $date $time from $2\n";

		# Now let's do a little housecleaning by making sure that
		# our idcachefrom array doesn't get any bigger than 1000
		# entries.  If we have not found the match by 1000 lines,
		# we probably won't find it, so delete the oldest item in
		# the array, as well as the scrubber array which is keeping
		# track of the queue id's for us.
		$counter++;
		$scrubber{$counter} = $id;
		if ( $scrubber{$counter-5000} ) {
			delete $idcachefrom{ $scrubber{$counter-5000} };
			delete $scrubber{$counter-5000};
			debug(5) && print "Counter -> $counter\n";
		}

		# Sleep one second every 1000 lines to relieve load
		if ( $opts{'nice'} ) {
			if ( $counter % 1000 eq 0 ) {
				debug(6) && print "Sleep 1 sec at $counter from matches.\n";
				sleep 1;
			}
		}
	}

	# Detect SpamAssassin scores
	if (m/X-Spam-Status:\s(Yes|No),\shits=([^\s]+).*tests=([^\s]+)/ ) {
		$idcachefrom{$id}->{'hits'} = $2;
		$idcachefrom{$id}->{'tests'} = $3;
		debug(7) && print "Spam? $1  Hits: $2\nTests: $3\n";
	}

	# Detect delivered mail
	if (m/^to=<([^\s]+)>,.*stat=([^\s]+)\s?([^\s]+)?(.*)/ ) {
		if ( lc($1) eq lc($email) ) {
			if ( "$2" eq "Deferred:" or
		             "$2$3" eq "Serviceunavailable" or
			     "$2$3" eq "Blockedby" or
			     ! $opts{'rejectsonly'} ) {
			     	#$user{$email}->{$id}->{'date'} = $date;
				$user{$email}->{$id}->{'status'} = "$2 $3$4";
				if ( ! $user{$email}->{$id}->{'from'} and
				       $idcachefrom{$id} ) {
					$user{$email}->{$id}->{'from'} = $idcachefrom{$id}->{'from'};
					$user{$email}->{$id}->{'relay'} = $idcachefrom{$id}->{'relay'};
					$user{$email}->{$id}->{'recvdate'} = $idcachefrom{$id}->{'recvdate'};
					$user{$email}->{$id}->{'recvtime'} = $idcachefrom{$id}->{'recvtime'};
					$user{$email}->{$id}->{'queueid'} = $idcachefrom{$id}->{'queueid'};
					$user{$email}->{$id}->{'server'} = $idcachefrom{$id}->{'server'};
					$user{$email}->{$id}->{'hits'} = $idcachefrom{$id}->{'hits'};
					$user{$email}->{$id}->{'tests'} = $idcachefrom{$id}->{'tests'};
				        $user{$email}->{$id}->{'tests'} =~ s/\\n\\t/\n\t/g;
					delete $idcachefrom{$id};
				}
				debug(3) && print "Status: $2\n";
			}
		}
	}
}

sub printsummary {
	# Print out summary
	print "\n";
	if ( $opts{'rejectsonly'} ) { print "Rejected "; }
	print "Email Summary for $email:\n\n";
	foreach (sort values %{$user{$email}}) {
		print "\n$_->{'recvdate'} $_->{'recvtime'} from $_->{'from'}";
		print "\nLocal Queue ID: $_->{'queueid'}";
		print "\nStatus: $_->{'status'}";
		print "\nReceived from: $_->{'relay'} by $_->{'server'}";
		print "\nScore: $_->{'hits'}";
		print "\nMatched tests: $_->{'tests'}\n";
	}
	print "\n";
}

if ( $opts{'h'} ) {
	usage;
	exit 0;
}

if ( ! $opts{'email'} ) {
	print "
	Error!  Must pass an email address to search for!
	";
	usage;
	exit 1;
} else {
	$email = $opts{'email'};
}


if ( $opts{'date'} ) {
	$forcedate = syslog_date($opts{'date'});
	$date = $forcedate;
	debug(8) && print "$forcedate returned from syslog_date()\n";
}

if ( $opts{'logfile'} ) {
	open (INFILE, '<', $opts{'logfile'}) or 
		die "Couldn't open $opts{'logfile'}\n";
	while (<INFILE>) { mainloop; };
	close INFILE;
	
} else {
	while (<>) { mainloop; };
}

printsummary;
