I spent a little time to read the Amavis and Amavis::AV package section of Amavisd-new-2.3.3. It has two ways to call clamav which is configured in /etc/amavisd.conf, calling Mail::CLamAV is memory hungry, talking to clamav unix socket is much more efficient :
!!!Note: the Line number which has '!!!' in front is the trace that subroutine are calling each other.
----------
@av_scanners = (
# ### http://www.clamav.net/
['ClamAV-clamd',
\&ask_daemon, ["CONTSCAN {}\n", "/var/run/clamav/clamd.sock"],
qr/\bOK$/, qr/\bFOUND$/,
qr/^.*?: (?!Infected Archive)(.*) FOUND$/ ],
# # NOTE: the easiest is to run clamd under the same user as amavisd; match the
# # socket name (LocalSocket) in clamav.conf to the socket name in this entry
# # When running chrooted one may prefer: ["CONTSCAN {}\n","$MYHOME/clamd"],
# ### http://www.clamav.net/ and CPAN (memory-hungry! clamd is preferred)
# ['Mail::ClamAV', \&ask_clamav, "*", [0], [1], qr/^INFECTED: (.+)/],
.....snip...
.....snip....
);
#@av_scanners is a list of n-tuples, where fields semantics is:
# @av_scanners is a list of n-tuples, where fields semantics is:
# 1. av scanner plain name, to be used in log and reports;
# 2. scanner program name; this string will be submitted to subroutine
# find_external_programs(), which will try to find the full program path
# name during startup; if program is not found, this scanner is disabled.
# Besides a simple string (full program path name or just the basename
# to be looked for in PATH), this may be an array ref of alternative
# program names or full paths - the first match in the list will be used;
# As a special case for more complex scanners, this field may be
# a subroutine reference, and the whole n-tuple is passed to it as args.
# 3. command arguments to be given to the scanner program;
# a substring {} will be replaced by the directory name to be scanned, i.e.
# "$tempdir/parts", a "*" will be replaced by base file names of parts;
# 4. an array ref of av scanner exit status values, or a regexp (to be
# matched against scanner output), indicating NO VIRUSES found;
# a special case is a value undef, which does not claim file to be clean
# (i.e. it never matches, similar to []), but suppresses a failure warning;
# to be used when the result is inconclusive (useful for specialized and
# quick partial scanners such as jpeg checker);
# 5. an array ref of av scanner exit status values, or a regexp (to be
# matched against scanner output), indicating VIRUSES WERE FOUND;
# Note: the virus match prevails over a 'not found' match, so it is safe
# even if the no. 4. matches for viruses too;
# 6. a regexp (to be matched against scanner output), returning a list
# of virus names found, or a sub ref, returning such a list when given
# scanner output as argument;
# 7. and 8.: (optional) subroutines to be executed before and after scanner
# (e.g. to set environment or current directory);
# see examples for these at KasperskyLab AVP and NAI uvscan.
-------------------------
The package Amavis is doing the main work of amavisd including if doing virus scan, spam scan...
7627 package Amavis;
7628 require 5.005; # need qr operator and \z in regexps
7629 use strict;
7630 use re 'taint';
......snip.........
8407 ### The heart of the program
8408 ### user customizable Net::Server hook
8409 sub process_request {
.....snip....
8834 # Checks the message stored on a file. File must already
8835 # be open on file handle $msginfo->mail_text; it need not be positioned
8836 # properly, check_mail must not close the file handle.
8837 #
8838 sub check_mail($$$) {
.........snip.......
9187 my ($av_ret);
9188 eval {
9189 my ( $vn, $ds );
9190 ( $av_ret, $av_output, $vn, $ds ) =
!!!9191 Amavis::AV::virus_scan( $tempdir, $child_task_count == 1,
!!!9192 $parts_root );
9193 @virusname = @$vn;
9194 @detecting_scanners = @$ds; # copy
9195 };
......snip........
9254 # consider doing spam scanning
9255 if ( !$extra_code_antispam ) {
9256 do_log( 5, "no anti-spam code loaded, skipping spam_scan" );
9257 }
9258 elsif (@virusname) {
9259 do_log( 5, "infected contents, skipping spam_scan" );
9260 }
9261 elsif ($banned_filename_all) {
9262 do_log( 5, "banned contents, skipping spam_scan" );
9263 }
...........snip.........
9281 else {
9282 $which_section = "spam_scan";
9283 ( $spam_level, $spam_status, $spam_report, $autolearn_status ) =
9284 Amavis::SpamControl::spam_scan( $conn, $msginfo );
........snip........
11424 #
11425 # Main program starts here
11426 #
11427
11428 # Read dynamic source code, and logging and notification message templates
11429 # from the end of this file (pseudo file handle DATA)
11430 #
11431 $Amavis::Conf::notify_spam_admin_templ = ''; # not used
11432 $Amavis::Conf::notify_spam_recips_templ = ''; # not used
11433 do {
11434 local ($/) = "__DATA__\n"; # set line terminator to this string
11435 chomp( $_ = <Amavis::DATA> ) for (
.............snip..........
11798 # set up Net::Server configuration
11799 my $server = bless {
11800 server => {
........snip........
11846
11847 %content% = 'amavisd (master)';
11848 $server->run; # transfer control to Net::Server
11849
11850 # shouldn't get here
11851 exit 1;
11852
End package Amavis
package Amavis::AV;
....
.....
#ask_daemon is a subroutine available for calling from @av_scanners list entries;
#it has the same args and returns as run_av() below
!!!15028 sub ask_daemon { ask_av(\&ask_daemon_internal, @_) }
# ask_av is a common subroutine available to be used by ask_daemon, ask_clamav,
# ask_sophos_savi and similar front-end routines used in @av_scanners entries.
# It traverses supplied files or directory ($bare_fnames) and calls a supplied
# subroutine for each file to be scanned, summarizing the final av scan result.
# It has the same args and returns as run_av() below, prepended by a checking
# subroutine argument.
15291 sub ask_av {
15292 my($code) = shift; # strip away the first argument, a subroutine ref
15293 my($bare_fnames,$names_to_parts,$tempdir, $av_name,$command,$args,
15294 $sts_clean,$sts_infected,$how_to_get_names) = @_;
15295 my($query_template) = ref $args eq 'ARRAY' ? $args->[0] : $args;
.....snip.....
.....
!!!15310 my($t_status,$t_output) = &$code($query, @_);
.....
...
do_log(3,"$av_name result: clean") if defined($scan_status) && !$scan_status;
($scan_status,$output,\@virusname);
}
subroutine ask_daemon_internal is doing the actual virus scanning work
# same args and returns as run_av() below,
# but prepended by a $query, which is the string to be sent to the daemon.
# Handles both UNIX and INET domain sockets.
# More than one socket may be specified for redundancy, they will be tried
# one after the other until one succeeds.
#
15201 sub ask_daemon_internal {
15202 my($query, # expanded query template, often a command and a file or dir name
15203 $bare_fnames,$names_to_parts,$tempdir, $av_name,$command,$args,
15204 $sts_clean,$sts_infected,$how_to_get_names, # regexps
15205 ) = @_;
........snip......
15237 # UGLY: bypass send method in IO::Socket to be able to retrieve
15238 # status/errno directly from 'send', not from 'getpeername':
15239 defined send($st_sock{$socketname}, $query, 0)
15240 or die "Can't send to socket $socketname: $!\n";
15241 my($rv); my($buff) = ''; undef $!;
!!!15242 while (defined($rv = $st_sock{$socketname}->recv($buff,8192,0))) {
*15243 $output .= $buff;
15244 last if $multisession || $buff eq '';
15245 undef $!;
........snip.......
15282 (0,$output); # return synthesised status and result string
15283 }
# Call a virus scanner and parse its output.
# Returns a triplet (or die in case of failure).
# The first element of the triplet is interpreted as follows:
# - true if virus found,
# - 0 if no viruses found,
# - undef if it did not complete its job;
# the second element is a string, the text as provided by the virus scanner;
# the third element is ref to a list of virus names found (if any).
# (it is guaranteed the list will be nonempty if virus was found)
#
15356 sub run_av {
15357 # first three args are prepended, not part of n-tuple
15358 my($bare_fnames, # a ref to a list of filenames to scan (basenames)
15359 $names_to_parts, # ref to a hash that maps base file names to parts object
15360 $tempdir, # temporary directory
15361 $av_name, $command, $args,
15362 $sts_clean, # a ref to a list of status values, or a regexp
15363 $sts_infected, # a ref to a list of status values, or a regexp
15364 $how_to_get_names, # ref to sub, or a regexp to get list of virus names
15365 $pre_code, $post_code, # routines to be invoked before and after av
15366 ) = @_;
15367 my($scan_status,$virusnames,$error_str); my($output) = '';
15368 &$pre_code(@_) if defined $pre_code;
!!!15369 if (ref($command) eq 'CODE') {
!!!15370 do_log(3,"Using $av_name: (built-in interface)");
!!!15371 ($scan_status,$output,$virusnames) = &$command(@_);
15372 } else {
........snip.........
15431 ($scan_status, $output, $virusnames);
15432 }
15434 sub virus_scan($$$) {
15435 my($tempdir,$firsttime,$parts_root) = @_;
15436 my($scan_status,$output,@virusname,@detecting_scanners);
15437 cy($anyone_done); my($anyone_tried);
15438 my($bare_fnames_ref,$names_to_parts);
.............................
15455 if (!@$bare_fnames_ref) { # no files to scan?
15456 ($this_status,$this_output,$this_vn) = (0, '', []); # declare clean
15457 } else { # call virus scanner
15458 eval {
15459 ($this_status,$this_output,$this_vn) =
!!!15460 run_av($bare_fnames_ref,$names_to_parts,$tempdir, @$av);
15461 };
....................
15491 ($scan_status, $output, \@virusname, \@detecting_scanners); # return a quad
15492 }
....
....
1;
#END package Amavis::AV
!!!Note: the Line number which has '!!!' in front is the trace that subroutine are calling each other.
----------
@av_scanners = (
# ### http://www.clamav.net/
['ClamAV-clamd',
\&ask_daemon, ["CONTSCAN {}\n", "/var/run/clamav/clamd.sock"],
qr/\bOK$/, qr/\bFOUND$/,
qr/^.*?: (?!Infected Archive)(.*) FOUND$/ ],
# # NOTE: the easiest is to run clamd under the same user as amavisd; match the
# # socket name (LocalSocket) in clamav.conf to the socket name in this entry
# # When running chrooted one may prefer: ["CONTSCAN {}\n","$MYHOME/clamd"],
# ### http://www.clamav.net/ and CPAN (memory-hungry! clamd is preferred)
# ['Mail::ClamAV', \&ask_clamav, "*", [0], [1], qr/^INFECTED: (.+)/],
.....snip...
.....snip....
);
#@av_scanners is a list of n-tuples, where fields semantics is:
# @av_scanners is a list of n-tuples, where fields semantics is:
# 1. av scanner plain name, to be used in log and reports;
# 2. scanner program name; this string will be submitted to subroutine
# find_external_programs(), which will try to find the full program path
# name during startup; if program is not found, this scanner is disabled.
# Besides a simple string (full program path name or just the basename
# to be looked for in PATH), this may be an array ref of alternative
# program names or full paths - the first match in the list will be used;
# As a special case for more complex scanners, this field may be
# a subroutine reference, and the whole n-tuple is passed to it as args.
# 3. command arguments to be given to the scanner program;
# a substring {} will be replaced by the directory name to be scanned, i.e.
# "$tempdir/parts", a "*" will be replaced by base file names of parts;
# 4. an array ref of av scanner exit status values, or a regexp (to be
# matched against scanner output), indicating NO VIRUSES found;
# a special case is a value undef, which does not claim file to be clean
# (i.e. it never matches, similar to []), but suppresses a failure warning;
# to be used when the result is inconclusive (useful for specialized and
# quick partial scanners such as jpeg checker);
# 5. an array ref of av scanner exit status values, or a regexp (to be
# matched against scanner output), indicating VIRUSES WERE FOUND;
# Note: the virus match prevails over a 'not found' match, so it is safe
# even if the no. 4. matches for viruses too;
# 6. a regexp (to be matched against scanner output), returning a list
# of virus names found, or a sub ref, returning such a list when given
# scanner output as argument;
# 7. and 8.: (optional) subroutines to be executed before and after scanner
# (e.g. to set environment or current directory);
# see examples for these at KasperskyLab AVP and NAI uvscan.
-------------------------
The package Amavis is doing the main work of amavisd including if doing virus scan, spam scan...
7627 package Amavis;
7628 require 5.005; # need qr operator and \z in regexps
7629 use strict;
7630 use re 'taint';
......snip.........
8407 ### The heart of the program
8408 ### user customizable Net::Server hook
8409 sub process_request {
.....snip....
8834 # Checks the message stored on a file. File must already
8835 # be open on file handle $msginfo->mail_text; it need not be positioned
8836 # properly, check_mail must not close the file handle.
8837 #
8838 sub check_mail($$$) {
.........snip.......
9187 my ($av_ret);
9188 eval {
9189 my ( $vn, $ds );
9190 ( $av_ret, $av_output, $vn, $ds ) =
!!!9191 Amavis::AV::virus_scan( $tempdir, $child_task_count == 1,
!!!9192 $parts_root );
9193 @virusname = @$vn;
9194 @detecting_scanners = @$ds; # copy
9195 };
......snip........
9254 # consider doing spam scanning
9255 if ( !$extra_code_antispam ) {
9256 do_log( 5, "no anti-spam code loaded, skipping spam_scan" );
9257 }
9258 elsif (@virusname) {
9259 do_log( 5, "infected contents, skipping spam_scan" );
9260 }
9261 elsif ($banned_filename_all) {
9262 do_log( 5, "banned contents, skipping spam_scan" );
9263 }
...........snip.........
9281 else {
9282 $which_section = "spam_scan";
9283 ( $spam_level, $spam_status, $spam_report, $autolearn_status ) =
9284 Amavis::SpamControl::spam_scan( $conn, $msginfo );
........snip........
11424 #
11425 # Main program starts here
11426 #
11427
11428 # Read dynamic source code, and logging and notification message templates
11429 # from the end of this file (pseudo file handle DATA)
11430 #
11431 $Amavis::Conf::notify_spam_admin_templ = ''; # not used
11432 $Amavis::Conf::notify_spam_recips_templ = ''; # not used
11433 do {
11434 local ($/) = "__DATA__\n"; # set line terminator to this string
11435 chomp( $_ = <Amavis::DATA> ) for (
.............snip..........
11798 # set up Net::Server configuration
11799 my $server = bless {
11800 server => {
........snip........
11846
11847 %content% = 'amavisd (master)';
11848 $server->run; # transfer control to Net::Server
11849
11850 # shouldn't get here
11851 exit 1;
11852
End package Amavis
package Amavis::AV;
....
.....
#ask_daemon is a subroutine available for calling from @av_scanners list entries;
#it has the same args and returns as run_av() below
!!!15028 sub ask_daemon { ask_av(\&ask_daemon_internal, @_) }
# ask_av is a common subroutine available to be used by ask_daemon, ask_clamav,
# ask_sophos_savi and similar front-end routines used in @av_scanners entries.
# It traverses supplied files or directory ($bare_fnames) and calls a supplied
# subroutine for each file to be scanned, summarizing the final av scan result.
# It has the same args and returns as run_av() below, prepended by a checking
# subroutine argument.
15291 sub ask_av {
15292 my($code) = shift; # strip away the first argument, a subroutine ref
15293 my($bare_fnames,$names_to_parts,$tempdir, $av_name,$command,$args,
15294 $sts_clean,$sts_infected,$how_to_get_names) = @_;
15295 my($query_template) = ref $args eq 'ARRAY' ? $args->[0] : $args;
.....snip.....
.....
!!!15310 my($t_status,$t_output) = &$code($query, @_);
.....
...
do_log(3,"$av_name result: clean") if defined($scan_status) && !$scan_status;
($scan_status,$output,\@virusname);
}
subroutine ask_daemon_internal is doing the actual virus scanning work
# same args and returns as run_av() below,
# but prepended by a $query, which is the string to be sent to the daemon.
# Handles both UNIX and INET domain sockets.
# More than one socket may be specified for redundancy, they will be tried
# one after the other until one succeeds.
#
15201 sub ask_daemon_internal {
15202 my($query, # expanded query template, often a command and a file or dir name
15203 $bare_fnames,$names_to_parts,$tempdir, $av_name,$command,$args,
15204 $sts_clean,$sts_infected,$how_to_get_names, # regexps
15205 ) = @_;
........snip......
15237 # UGLY: bypass send method in IO::Socket to be able to retrieve
15238 # status/errno directly from 'send', not from 'getpeername':
15239 defined send($st_sock{$socketname}, $query, 0)
15240 or die "Can't send to socket $socketname: $!\n";
15241 my($rv); my($buff) = ''; undef $!;
!!!15242 while (defined($rv = $st_sock{$socketname}->recv($buff,8192,0))) {
*15243 $output .= $buff;
15244 last if $multisession || $buff eq '';
15245 undef $!;
........snip.......
15282 (0,$output); # return synthesised status and result string
15283 }
# Call a virus scanner and parse its output.
# Returns a triplet (or die in case of failure).
# The first element of the triplet is interpreted as follows:
# - true if virus found,
# - 0 if no viruses found,
# - undef if it did not complete its job;
# the second element is a string, the text as provided by the virus scanner;
# the third element is ref to a list of virus names found (if any).
# (it is guaranteed the list will be nonempty if virus was found)
#
15356 sub run_av {
15357 # first three args are prepended, not part of n-tuple
15358 my($bare_fnames, # a ref to a list of filenames to scan (basenames)
15359 $names_to_parts, # ref to a hash that maps base file names to parts object
15360 $tempdir, # temporary directory
15361 $av_name, $command, $args,
15362 $sts_clean, # a ref to a list of status values, or a regexp
15363 $sts_infected, # a ref to a list of status values, or a regexp
15364 $how_to_get_names, # ref to sub, or a regexp to get list of virus names
15365 $pre_code, $post_code, # routines to be invoked before and after av
15366 ) = @_;
15367 my($scan_status,$virusnames,$error_str); my($output) = '';
15368 &$pre_code(@_) if defined $pre_code;
!!!15369 if (ref($command) eq 'CODE') {
!!!15370 do_log(3,"Using $av_name: (built-in interface)");
!!!15371 ($scan_status,$output,$virusnames) = &$command(@_);
15372 } else {
........snip.........
15431 ($scan_status, $output, $virusnames);
15432 }
15434 sub virus_scan($$$) {
15435 my($tempdir,$firsttime,$parts_root) = @_;
15436 my($scan_status,$output,@virusname,@detecting_scanners);
15437 cy($anyone_done); my($anyone_tried);
15438 my($bare_fnames_ref,$names_to_parts);
.............................
15455 if (!@$bare_fnames_ref) { # no files to scan?
15456 ($this_status,$this_output,$this_vn) = (0, '', []); # declare clean
15457 } else { # call virus scanner
15458 eval {
15459 ($this_status,$this_output,$this_vn) =
!!!15460 run_av($bare_fnames_ref,$names_to_parts,$tempdir, @$av);
15461 };
....................
15491 ($scan_status, $output, \@virusname, \@detecting_scanners); # return a quad
15492 }
....
....
1;
#END package Amavis::AV