#!/usr/bin/perl
# Script to expire spam for users with more than 15M in /home
# Automated version designed to scan for top 10 or so.
# Last update Dec 11/2003 kdeugau@deepnet.cx
# Copyright 2003-2006 Kris Deugau <kdeugau@deepnet.cx>

# Some filesystem modules:
use Fcntl qw(:DEFAULT :flock);

# Time
use Time::Local;

sub expirethespam($$);

# First, we set some defaults.
# Allow non-defaults to be passed on the command line.
if ($ARGV[0] == "") {
  $period=30;
  $units="d";
} else {
  $period = $ARGV[0];
  if ($ARGV[1] == "") {
    $units = "d";
  } else {
    $units = $ARGV[1];
  }
}

# how many seconds in 30 days?
if ($units == "d") {
  $timediff = $period*24*60*60;
} elsif ($units == "m") {
# nominal 30 days in a month;
  $timediff = $period*30*24*60*60;
}
# some hashes for turning "Mon" or "Oct" into a number usable by timegm()
%months = ('Jan',0,'Feb',1,'Mar',2,'Apr',3,'May',4,'Jun',5,'Jul',6,'Aug',7,'Sep',8,'Oct',9,'Nov',10,'Dec',11);

# Hokay.  Snag the quota info, and loop over it to make sure we trim spamboxes for anyone over
# 15.5M on /home.  Messy, and crude, but highly effective unless the (l)user is "advanced".  In
# which case they're on their own.  We could also auto-empty trash folders...  heheheheh.. ;)

print "Whacking Razor2 log for users over (quota-4500)M...\n";
@quotalist_mfolders = `repquota /home`;
for ($i=5; $i<@quotalist_mfolders; $i++) {
  @linebits = split /\s+/, $quotalist_mfolders[$i];
  if ( ($linebits[3] != 0) && ($linebits[2] > ($linebits[3]-4500)) ) {
    # Got a quotabreaker, but not root, and (woohoo!) not a null line.
    print "\t$linebits[0]\n";
    qx { cat /dev/null > /home/$linebits[0]/.razor/razor-agent.log };
  }
}

print "Trimming spamboxes for users over 15.5M quota...\n";

@quotalist = `repquota /home`;

for ($i=5; $i<@quotalist; $i++) {
  $nmsgs = 0;
  $unexpired = 0;
  @linebits = split /\s+/, $quotalist[$i];
# 15500K changed to 2550K temporarily to get only "newer" spam.
  if ( ($linebits[3] != 0) && ($linebits[2] > 2550) ) {

    # Flow:
    # -> Open user's spambox.  Lock it so procmail can't write new
    #    data while we're busy expiring old data.  This process
    #    *shouldn't* take long;  YMMV.

    sysopen(SPAMFILE, "/home/$linebits[0]/mail/spam", O_RDWR|O_CREAT)
      or die "Can't open /home/$linebits[0]/mail/spam: $!";
    flock(SPAMFILE, LOCK_EX)
      or die "Can't lock /home/$linebits[0]/mail/spam: $!";

    # We don't really need to lock this file;  
    open CURRENTSPAM, ">/home/$linebits[0]/mail/spam.lock";

    # -> Start separating messages apart.  Be *very* careful about
    #    parsing the "From " semantics;  make sure the *entire* line
    #    is properly formed!  We're going to be a little more
    #    restrictive than the RFCs on email address formatting;
    #    while spaces are nominally technically *valid*, they're
    #    usually *ignored*.  Among other things.

    while (<SPAMFILE>) {
      # read data
      if ($unexpired==1) {
	# -> Once we reach a message that's "OK", we start stuffing
	#    data into a temporary file (spam.lock, preferably).  Once
	#    finished, we can close and reopen that file (ugh) and move
	#    the data back to the original file.
	print CURRENTSPAM;
      } else {
	if (/^From /) {
	  $nmsgs++;
	  # -> Compare the received date (on Filtermail) for each message
	  #    to the current date.  If the listed date is beyond the
	  #    boundary, drop the message and continue on to the next.
	  # We can do this quite neatly with the "From " line, as it's
	  # generated locally.  Heheheh.  :)
	  $fromline = $_;
	  @frombits = split /\s+/;
	  $fromnum = @frombits;
$wday = $frombits[$fromnum-5];
$month = $frombits[$fromnum-4];
$mday = $frombits[$fromnum-3];
$dtime = $frombits[$fromnum-2];
$year = $frombits[$fromnum-1];
#	  @datebits = split /\s+/, $frombits[1];
# $datebits[0] == wday;  [1] == month;  [2] == mday;  [3] == time;  [4] == year;

	#print "$frombits[4]\t";
	# hokay, dates get chewed up badly :( but they're there.
	  @daybits = split /:/, $dtime[3];

	  $msgtstamp = timegm($daybits[2], $daybits[1], $daybits[0], $mday, $months{$month}, $year-1900);

# From "erefvlbs5@chelsa1234567"@yahoo.com  Wed Nov 19 21:30:31 2003
# From "jrDude001@prodigy.com, yahoo309@yahoo.com, Kate555"@kiss.com  Wed Nov 26 17:07:44 2003
# From "Melisa Painter"@clerk.com  Mon Dec  8 21:25:35 2003

#$temp1 = $fromline;
#$temp1 =~ s/\".+\"//;
#@fromtemp = split /\s+/, $temp1;
#$fromnum = @fromtemp;

# $fromtemp[$fromnum-4] == month
# $fromtemp[$fromnum-3] == day

#print "$fromtemp[$fromnum-4] $fromtemp[$fromnum-3]\n";
#print $temp1;
#    @frombits = split /\s+/, $_, 3;
#    @datebits = split /\s+/, $frombits[2];
#    $temp = "$datebits[1] $datebits[2]";

#    $temp = sprintf "%s %02u",  $fromtemp[$fromnum-4], $fromtemp[$fromnum-3];
#    $msgcount{$temp}++;

	  $msgtstamp += $timediff;
	  if ( $msgtstamp > time() ) {
	    # Set a flag!  We've got unexpired spam.
	    $unexpired = 1;
	    $nmsgs--;
	    print CURRENTSPAM $fromline;
	  } # unexpired spam
	} # is it a "From " start-of-new-message line?
      } # Not an unexpired message yet
    } # Read from the spamfile

    # Reopen the "unexpired" spam for reading...
    close CURRENTSPAM;
    open CURRENTSPAM, "</home/$linebits[0]/mail/spam.lock";

    # ... rewind to the beginning of the original file and truncate...
    seek(SPAMFILE, 0, 0);
    truncate(SPAMFILE,0)
      or die "Can't truncate original spambox: $!";

    # ... and stream data from CURRENTSPAM
    while (<CURRENTSPAM>) {
      print SPAMFILE;
    }

    close CURRENTSPAM;

    # Remove the .lock file, as it's no longer needed.  We *might* be
    # able to remove it while it's open... but I wouldn't count on
    # being able to do so.
    unlink "/home/$linebits[0]/mail/spam.lock";

    print "Expired $nmsgs messages from $linebits[0]\'s spambox.\n";

    if ($nmsgs == 0) {
      $nmsgs = &expirethespam($linebits[0],21);
    }

    # Let this happen implicitly, to force proper buffer flushes *before* unlocking files.
    #close SPAMFILE;
  }
}

exit 0;

sub expirethespam ($$){
  my ($user,$days) = @_;
  my $nmsgs;

  # Flow:
  # -> Open user's spambox.  Lock it so procmail can't write new
  #    data while we're busy expiring old data.  This process
  #    *shouldn't* take long;  YMMV.

  sysopen(SPAMFILE, "/home/$user/mail/spam", O_RDWR|O_CREAT)
    or die "Can't open /home/$user/mail/spam: $!";
  flock(SPAMFILE, LOCK_EX)
    or die "Can't lock /home/$user/mail/spam: $!";

  # We don't really need to lock this file:
  open CURRENTSPAM, ">/home/$user/mail/spam.lock";

  # -> Start separating messages apart.  Be *very* careful about
  #    parsing the "From " semantics;  make sure the *entire* line
  #    is properly formed!  We're going to be a little more
  #    restrictive than the RFCs on email address formatting;
  #    while spaces are nominally technically *valid*, they're
  #    usually *ignored*.  Among other things.

  while (<SPAMFILE>) {
    # read data
    if ($unexpired==1) {
	# -> Once we reach a message that's "OK", we start stuffing
	#    data into a temporary file (spam.lock, preferably).  Once
	#    finished, we can close and reopen that file (ugh) and move
	#    the data back to the original file.
      print CURRENTSPAM;
    } else {
      if (/^From /) {
	$nmsgs++;
	# -> Compare the received date (on Filtermail) for each message
	#    to the current date.  If the listed date is beyond the
	#    boundary, drop the message and continue on to the next.
	# We can do this quite neatly with the "From " line, as it's
	# generated locally.  Heheheh.  :)
	$fromline = $_;
	@frombits = split /\s\s/, $_, 2;
	@datebits = split /\s+/, $frombits[1];
      #print "$frombits[4]\t";
	# hokay, dates get chewed up badly :( but they're there.
	@daybits = split /:/, $datebits[3];
	$msgtstamp = timegm($daybits[2], $daybits[1], $daybits[0], $datebits[2], $months{$datebits[1]}, $datebits[4]-1900);
	$msgtstamp += $timediff;
	if ( $msgtstamp > time() ) {
	  # Set a flag!  We've got unexpired spam.
	  $unexpired = 1;
	  $nmsgs--;
	  print CURRENTSPAM $fromline;
	} # unexpired spam
      } # is it a "From " start-of-new-message line?
    } # Not an unexpired message yet
  } # Read from the spamfile

  # Reopen the "unexpired" spam for reading...
  close CURRENTSPAM;
  open CURRENTSPAM, "</home/$user/mail/spam.lock";

  # ... rewind to the beginning of the original file and truncate...
  seek(SPAMFILE, 0, 0);
  truncate(SPAMFILE,0)
    or die "Can't truncate original spambox: $!";

  # ... and stream data from CURRENTSPAM
  while (<CURRENTSPAM>) {
    print SPAMFILE;
  }

  close CURRENTSPAM;

  # Remove the .lock file, as it's no longer needed.  We *might* be
  # able to remove it while it's open... but I wouldn't count on
  # being able to do so.
  unlink "/home/$user/mail/spam.lock";

  return $nmsgs;
}
