# Call Clam Antivirus from SpamAssassin

package Mail::SpamAssassin::Plugin::ClamAV;
use strict;

# $Id: clamav.pm 2377 2019-06-04 17:59:34Z kdeugau $

# version 2.0, 2010-01-07
#   - use SA public interface set_tag() and add_header, instead of
#     pushing a header field directly into $conf->{headers_spam}
# 2016/10/07 kdeugau@vianet.ca
#   - Switch to ClamAV::Client since File::Scan:ClamAV isn't in Debian,
#     and looked to be a headache to package
#   - Extend and add configuration options for socket path or host:port
#     and to toggle extended signature info if clamd returns it.
# 2017/03/03 kdeugau@vianet.ca
#   - Further review and refine for robustness by adding a timeout, and
#     checking for ClamAV::Client
# 2017/03/14 kdeugau@vianet.ca
#   - Add Clam result to the "spamd: result" line, so we don't have to go
#     matching SA vs Clam logs

use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Timeout;

use ClamAV::Client;
our @ISA = qw(Mail::SpamAssassin::Plugin);

sub new {
  # plugin setup boilerplate
  my ($class, $mailsa) = @_;
  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsa);
  bless($self, $class);

  # check and make sure ClamAV::Client is available
  $self->{clamav_available} = 0;
# this is arguably a network test, but only if we use TCP sockets *and* the IP is remote.  That would be messy to check on.
#  if ($mailsa->{local_tests_only}) {
#    dbg("ClamAV: local tests only, skipping ClamAV");
#  } else
  if (eval { require ClamAV::Client; }) {
    $self->{clamav_available} = 1;
    dbg("ClamAV: ClamAV is available");
  } else {
    dbg("ClamAV: ClamAV is not available");
  }

  # register plugin call(back?)s
  $self->register_eval_rule("check_clamav");
  $self->set_config($mailsa->{conf});
  return $self;
}

sub set_config {
  my($self, $conf) = @_;
  my @cmds;

=head1 ADMIN OPTIONS

=over 4

=item clamav_socket_path

The full path to the local ClamAV UNIX socket

=cut

  push (@cmds, {
	setting => 'clamav_socket_path',
	is_admin => 1,
	type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
	});

=item clamav_socket_port NUMBER    (default 3310)

The TCP port number for TCP socket instead of UNIX socket

=cut

  push (@cmds, {
	setting => 'clamav_socket_port',
	default => 3310,
	is_admin => 1,
	type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
	});

=item clamav_socket_host HOSTNAME

The hostname or IP for the ClamAV TCP host

=cut

  push (@cmds, {
	setting => 'clamav_socket_host',
	is_admin => 1,
	type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
	});

=item clamav_extended_detection (0|1)     (default 0)

Include extra hit details (file size, hash) in virus "name" result

=cut

  # complement of sorts to ExtendedDetectionInfo in clamd.conf
  # since leaving it in the result looks messy
  push (@cmds, {
	setting => 'clamav_extended_detection',
	default => 0,
	is_admin => 1,
	type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
	});

=item clamav_timeout NUMBER    (default 3)

Time to wait for Clam to scan the message.  Mainly a leash against clamd going into a sulk because of bad signature files.

3 seconds is still probably far too long to wait in most mail flows.

=cut

  push (@cmds, {
	setting => 'clamav_timeout',
	default => 3,
	is_admin => 1,
	type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION,
	});

  $conf->{parser}->register_commands(\@cmds);
} # set_config()

# hook in to some other bits of SA internals
sub parsed_metadata {
  my ($self, $params) = @_;
  my $scan = $params->{permsgstatus};

  # add another entry to the "spamd: result" line
  $scan->set_spamd_result_item (sub {
	"clamsig=".$scan->get_tag("CLAMAVRESULT");
	});
} # parsed_metadata()

# prepare for a message scan
sub check_start{
  my ($self, $params) = @_;
  my $pms = $params->{permsgstatus};

  $pms->{tag_data}->{CLAMAVRESULT} = "";
}

# custom eval: sub
sub check_clamav {
  my($self, $pms, $fulltext) = @_;
  my $timeout = $self->{main}->{conf}->{clamav_timeout};

  # initialize the tag
  $pms->{tag_data}->{CLAMAVRESULT} = "";

  return unless $self->{clamav_available};

  my $isspam = 0;
  my $header = "No";
  my $clamav;
  # prefer local unix socket over TCP
  if ($pms->{conf}->{clamav_socket_path}) {
# Something Funky Happened to trip on taint mode.
    my ($sock) = ($pms->{conf}->{clamav_socket_path} =~ /^(.+)$/);
    if (-e $sock && -S $sock) {
      dbg("ClamAV: invoking ClamAV::Client, sock:%s", $sock);
      $clamav = new ClamAV::Client (
        socket_name     => $sock
        );
    } else {
      dbg("ClamAV: configured for local socket but no socket present at $sock");
      $pms->set_tag('CLAMAVRESULT', "configured for local socket but no socket present at $sock");
      return 0;
    }
  } elsif ($pms->{conf}->{clamav_socket_host} && $pms->{conf}->{clamav_socket_port}) {
# Something Funky Happened to trip on taint mode.
    my ($host) = ($pms->{conf}->{clamav_socket_host} =~ /^(.+)$/);
# Something Funky Happened to trip on taint mode.
    my ($port) = ($pms->{conf}->{clamav_socket_port} =~ /^(.+)$/);
    dbg("ClamAV: invoking ClamAV::Client, %s:%s", $host, $port);
    $clamav = new ClamAV::Client (
        socket_host     => $host,
        socket_port     => $port
        );
  } else {
    # Your llama is on fire.
    dbg("ClamAV: configuration missing");
    $pms->set_tag('CLAMAVRESULT', "configuration missing");
    return 0;
  }

  my $timer = Mail::SpamAssassin::Timeout->new(
           { secs => $timeout, deadline => $pms->{master_deadline} });
  my $err = $timer->run_and_catch(sub {

    my $result;
    eval {
      $result = $clamav->scan_scalar($fulltext);
      dbg("ClamAV: '%s'", ($result ? $result : 'No'));
    };
    if ($@) {
      my $errstr = $@;
      chomp $errstr;
      $header = "Error: $errstr";
    } elsif ($result) {
      $result =~ s/\([\da-f]+:\d+\)// unless $pms->{conf}->{clamav_extended_detection};
      $header = "Yes: $result";
      $isspam = 1;
      # include the virus name in SpamAssassin's report
      $pms->test_log($result);
    }

  });  # anonsub for timeout

  if ($timer->timed_out()) {
    dbg("ClamAV: check timed out after $timeout seconds");
    return 0;
  }

  # not sure what might trigger this case
  if ($err) {
    dbg("ClamAV: Something strange happened: $err");
  }

  dbg("ClamAV: result - $header");
  $pms->set_tag('CLAMAVRESULT', $header);
  # add a metadatum so that rules can match against the result too
  $pms->{msg}->put_metadata('X-Spam-Virus',$header);
  return $isspam;
}

1;
