
mmartinec at apache
Nov 24, 2009, 8:32 AM
Post #1 of 1
(187 views)
Permalink
|
|
svn commit: r883770 - in /spamassassin/trunk: lib/Mail/SpamAssassin/AutoWhitelist.pm lib/Mail/SpamAssassin/Plugin/AWL.pm lib/Mail/SpamAssassin/SQLBasedAddrList.pm sql/README.awl sql/awl_mysql.sql sql/awl_pg.sql t/data/spam/004
|
|
Author: mmartinec Date: Tue Nov 24 16:32:51 2009 New Revision: 883770 URL: http://svn.apache.org/viewvc?rev=883770&view=rev Log: Bug 6203: make AWL CIDR mask configurable: auto_whitelist_ipv4_mask_len and auto_whitelist_ipv6_mask_len; update README.awl and sql/awl_*.sql accordingly (increasing awl.ip field width); 'fix' the t/data/spam/004 sample mail to avoid a test failing with a /24 net mask; avoid race condition in SQLBasedAddrList.pm when multiple processes try to insert-or-update an awl SQL record: try INSERT first, and if that fails go for UPDATE. Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm spamassassin/trunk/sql/README.awl spamassassin/trunk/sql/awl_mysql.sql spamassassin/trunk/sql/awl_pg.sql spamassassin/trunk/t/data/spam/004 Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm (original) +++ spamassassin/trunk/lib/Mail/SpamAssassin/AutoWhitelist.pm Tue Nov 24 16:32:51 2009 @@ -45,7 +45,7 @@ use warnings; use bytes; use re 'taint'; -use NetAddr::IP; +use NetAddr::IP qw(:upper); # ensure IPv6 string case even if default changes use Mail::SpamAssassin; use Mail::SpamAssassin::Logger; @@ -64,18 +64,20 @@ $class = ref($class) || $class; my ($main, $msg) = @_; + my $conf = $main->{conf}; my $self = { - 'main' => $main, + main => $main, + factor => $conf->{auto_whitelist_factor}, + ipv4_mask_len => $conf->{auto_whitelist_ipv4_mask_len}, + ipv6_mask_len => $conf->{auto_whitelist_ipv6_mask_len}, }; - $self->{factor} = $main->{conf}->{auto_whitelist_factor}; - my $factory; if ($main->{pers_addr_list_factory}) { $factory = $main->{pers_addr_list_factory}; } else { - my $type = $main->{conf}->{auto_whitelist_factory}; + my $type = $conf->{auto_whitelist_factory}; if ($type =~ /^([_A-Za-z0-9:]+)$/) { $type = untaint_var($type); eval ' @@ -278,39 +280,68 @@ ########################################################################### -sub pack_addr { - my ($self, $addr, $origip) = @_; - - $addr = lc $addr; - $addr =~ s/[\000\;\'\"\!\|]/_/gs; # paranoia +sub ip_to_awl_key { + my ($self, $origip) = @_; + my $result; local $1; if (!defined $origip) { - # could not find an IP address to use, could be localhost mail or from - # the user running "add-addr-to-*". - $origip = 'none'; + # could not find an IP address to use } elsif ($origip =~ /^ (\d{1,3} \. \d{1,3}) \. \d{1,3} \. \d{1,3} $/xs) { - $origip = $1; - } elsif ($origip =~ /:/ && + my $mask_len = $self->{ipv4_mask_len}; + $mask_len = 16 if !defined $mask_len; + # handle the default and easy cases manually + if ($mask_len == 32) { + $result = $origip; + } elsif ($mask_len == 16) { + $result = $1; + } else { + my $origip_obj = NetAddr::IP->new($origip . '/' . $mask_len); + if (!defined $origip_obj) { # invalid IPv4 address + dbg("auto-whitelist: bad IPv4 address $origip"); + } else { + $result = $origip_obj->network->addr; + $result =~s/(\.0){1,3}\z//; # truncate zero tail + } + } + } elsif ($origip =~ /:/ && # triage $origip =~ /^ [0-9a-f]{0,4} (?: : [0-9a-f]{0,4} | \. [0-9]{1,3} ){2,9} $/xsi) { # looks like an IPv6 address - my $origip_obj = NetAddr::IP->new6($origip); + my $mask_len = $self->{ipv6_mask_len}; + $mask_len = 48 if !defined $mask_len; + my $origip_obj = NetAddr::IP->new6($origip . '/' . $mask_len); if (!defined $origip_obj) { # invalid IPv6 address dbg("auto-whitelist: bad IPv6 address $origip"); - $origip = 'junk-' . $origip; } else { - $origip = $origip_obj->full6; # string in a canonical form - $origip =~ s/(:[0-9a-f]{4}){5}\z//si; # keep only the /48 network addr - $origip .= '::'; # although it wastes 2 chars, it's nice to make it - # look like a syntactically correct IPv6 address + $result = $origip_obj->network->full6; # string in a canonical form + $result =~ s/(:0000){1,7}\z/::/; # compress zero tail } } else { dbg("auto-whitelist: bad IP address $origip"); - $origip =~ s/[^0-9A-Fa-f:.]/_/gs; # paranoia - $origip = 'junk-' . $origip; } - $origip = substr($origip,0,16) if length($origip) > 16; # awl.ip field + if (defined $result && length($result) > 39) { # just in case, keep under + $result = substr($result,0,39); # the awl.ip field size + } + dbg("auto-whitelist: IP masking %s -> %s", $origip,$result); + return $result; +} + +########################################################################### + +sub pack_addr { + my ($self, $addr, $origip) = @_; + + $addr = lc $addr; + $addr =~ s/[\000\;\'\"\!\|]/_/gs; # paranoia + + if (!defined $origip) { + # could not find an IP address to use, could be localhost mail + # or from the user running "add-addr-to-*". + $origip = 'none'; + } else { + $origip = $self->ip_to_awl_key($origip); + } return $addr . "|ip=" . $origip; } Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm (original) +++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm Tue Nov 24 16:32:51 2009 @@ -148,6 +148,67 @@ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC }); +=item auto_whitelist_ipv4_mask_len n (default: 16, range [0..32]) + +The AWL database keeps only the specified number of most-significant bits +of an IPv4 address in its fields, so that different individual IP addresses +within a subnet belonging to the same owner are managed under a single +database record. As we have no information available on the allocated +address ranges of senders, this CIDR mask length is only an approximation. +The default is 16 bits, corresponding to a former class B. Increase the +number if a finer granularity is desired, e.g. to 24 (class C) or 32. +A value 0 is allowed but is not particularly useful, as it would treat the +whole internet as a single organization. The number need not be a multiple +of 8, any split is allowed. + +=cut + + push (@cmds, { + setting => 'auto_whitelist_ipv4_mask_len', + default => 16, + type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, + code => sub { + my ($self, $key, $value, $line) = @_; + if (!defined $value || $value eq '') { + return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; + } elsif ($value !~ /^\d+$/ || $value < 0 || $value > 32) { + return $Mail::SpamAssassin::Conf::INVALID_VALUE; + } + $self->{auto_whitelist_ipv4_mask_len} = $value; + } + }); + +=item auto_whitelist_ipv6_mask_len n (default: 48, range [0..128]) + +The AWL database keeps only the specified number of most-significant bits +of an IPv6 address in its fields, so that different individual IP addresses +within a subnet belonging to the same owner are managed under a single +database record. As we have no information available on the allocated address +ranges of senders, this CIDR mask length is only an approximation. The default +is 48 bits, corresponding to an address range commonly allocated to individual +(smaller) organizations. Increase the number for a finer granularity, e.g. +to 64 or 96 or 128, or decrease for wider ranges, e.g. 32. A value 0 is +allowed but is not particularly useful, as it would treat the whole internet +as a single organization. The number need not be a multiple of 4, any split +is allowed. + +=cut + + push (@cmds, { + setting => 'auto_whitelist_ipv6_mask_len', + default => 48, + type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, + code => sub { + my ($self, $key, $value, $line) = @_; + if (!defined $value || $value eq '') { + return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; + } elsif ($value !~ /^\d+$/ || $value < 0 || $value > 128) { + return $Mail::SpamAssassin::Conf::INVALID_VALUE; + } + $self->{auto_whitelist_ipv6_mask_len} = $value; + } + }); + =item user_awl_sql_override_username Used by the SQLBasedAddrList storage implementation. Modified: spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm (original) +++ spamassassin/trunk/lib/Mail/SpamAssassin/SQLBasedAddrList.pm Tue Nov 24 16:32:51 2009 @@ -136,7 +136,8 @@ my $dbh = DBI->connect($dsn, $dbuser, $dbpass, {'PrintError' => 0}); if(!$dbh) { - dbg("auto-whitelist: sql-based unable to connect to database ($dsn) : " . DBI::errstr); + info("auto-whitelist: sql-based unable to connect to database (%s) : %s", + $dsn, DBI::errstr); return undef; } @@ -203,26 +204,28 @@ my $sql = "SELECT count, totscore FROM $self->{tablename} " . "WHERE username = ? AND email = ?"; - my @args = ($email); - if ($self->{_with_awl_signer}) { - my @signedby = !defined $signedby ? '' : split(' ', lc $signedby); - if (@signedby == 1) { + my @args = ( $email ); + if (!$self->{_with_awl_signer}) { + $sql .= " AND ip = ?"; + push(@args, $ip); + } else { + my @signedby = !defined $signedby ? () : split(' ', lc $signedby); + if (!@signedby) { + $sql .= " AND signedby = '' AND ip = ?"; + push(@args, $ip); + } elsif (@signedby == 1) { $sql .= " AND signedby = ?"; } elsif (@signedby > 1) { $sql .= " AND signedby IN (" . join(',', ('?') x @signedby) . ")"; } push(@args, @signedby); } - if (!$self->{_with_awl_signer} || !defined $signedby || $signedby eq '') { - $sql .= " AND ip = ?"; - push(@args, $ip); - } my $sth = $self->{dbh}->prepare($sql); my $rc = $sth->execute($self->{_username}, @args); if (!$rc) { # there was an error, but try to go on - my $err = $self->{dbh}->errstr; - dbg("auto-whitelist: sql-based get_addr_entry: SQL error: $err"); + info("auto-whitelist: sql-based get_addr_entry %s: SQL error: %s", + join('|',@args), $sth->errstr); $entry->{count} = 0; $entry->{totscore} = 0; } @@ -240,7 +243,7 @@ $cnt++; } dbg("auto-whitelist: sql-based get_addr_entry: %s for %s", - $cnt ? "found $cnt entry" : 'no entries found', + $cnt ? "found $cnt entries" : 'no entries found', join('|',@args) ); } $sth->finish(); @@ -279,62 +282,81 @@ return $entry unless $email ne '' && (defined $ip || defined $signedby); - if ($entry->{exists_p}) { # entry already exists, so just update - my(@args) = ($score, $self->{_username}, $email); + # try inserting first, and if that fails we'll do the update; this way + # we avoid to large extent a race condition between multiple processes + + my $inserted = 0; + + { my @fields = qw(username email ip count totscore); + my @signedby; + if ($self->{_with_awl_signer}) { + push(@fields, 'signedby'); + @signedby = !defined $signedby ? () : split(' ', lc $signedby); + @signedby = ( '' ) if !@signedby; + } + my @args = ($self->{_username}, $email, $ip, 1, $score); + my $sql = sprintf("INSERT INTO %s (%s) VALUES (%s)", $self->{tablename}, + join(',', @fields), join(',', ('?') x @fields)); + my $sth = $self->{dbh}->prepare($sql); + + if (!$self->{_with_awl_signer}) { + my $rc = $sth->execute(@args); + if (!$rc) { + dbg("auto-whitelist: sql-based add_score/insert %s: SQL error: %s", + join('|',@args), $sth->errstr); + } else { + dbg("auto-whitelist: sql-based add_score/insert ". + "score %s: %s", $score, join('|',@args)); + $inserted = 1; $entry->{exists_p} = 1; + } + } else { + for my $s (@signedby) { + my $rc = $sth->execute(@args, $s); + if (!$rc) { + dbg("auto-whitelist: sql-based add_score/insert %s: SQL error: %s", + join('|',@args,$s), $sth->errstr); + } else { + dbg("auto-whitelist: sql-based add_score/insert ". + "score %s: %s", $score, join('|',@args,$s)); + $inserted = 1; $entry->{exists_p} = 1; + } + } + } + } + + if (!$inserted) { + # insert failed, assume primary key constraint, so try the update + my $sql = "UPDATE $self->{tablename} ". "SET count = count + 1, totscore = totscore + ? ". "WHERE username = ? AND email = ?"; - my @signedby; + my(@args) = ($score, $self->{_username}, $email); if ($self->{_with_awl_signer}) { - @signedby = !defined $signedby ? '' : split(' ', lc $signedby); - if (@signedby == 1) { + my @signedby = !defined $signedby ? () : split(' ', lc $signedby); + if (!@signedby) { + $sql .= " AND signedby = ''"; + } elsif (@signedby == 1) { $sql .= " AND signedby = ?"; } elsif (@signedby > 1) { $sql .= " AND signedby IN (" . join(',', ('?') x @signedby) . ")"; } push(@args, @signedby); } - if (!$self->{_with_awl_signer} || !defined $signedby || $signedby eq '') { - $sql .= " AND ip = ?"; - push(@args, $ip); - } + $sql .= " AND ip = ?"; + push(@args, $ip); + my $sth = $self->{dbh}->prepare($sql); my $rc = $sth->execute(@args); if (!$rc) { - my $err = $self->{dbh}->errstr; - dbg("auto-whitelist: sql-based add_score/update: SQL error: $err"); - } - else { - dbg("auto-whitelist: sql-based add_score: ". + info("auto-whitelist: sql-based add_score/update %s: SQL error: %s", + join('|',@args), $sth->errstr); + } else { + dbg("auto-whitelist: sql-based add_score/update ". "new count: %s, new totscore: %s for %s", - $entry->{count}, $entry->{totscore}, join('|',@args[2..$#args])); - } - $sth->finish(); - } - else { # no entry yet, so insert a new entry - my @fields = qw(username email ip count totscore); - my @args = ($self->{_username}, $email, $ip, 1, $score); - my @signedby; - if ($self->{_with_awl_signer}) { - push(@fields, 'signedby'); - @signedby = !defined $signedby ? '' : split(' ', lc $signedby); - } - my $sql = sprintf("INSERT INTO %s (%s) VALUES (%s)", $self->{tablename}, - join(',', @fields), join(',', ('?') x @fields)); - my $sth = $self->{dbh}->prepare($sql); - for my $s (@signedby) { - # loop runs for at least one element, possibly an empty signing domain - my $rc = $sth->execute(@args, (@fields > @args ? $s : ()) ); - if (!$rc) { - my $err = $self->{dbh}->errstr; - dbg("auto-whitelist: sql-based add_score/insert: SQL error: $err"); - } + $entry->{count}, $entry->{totscore}, join('|',@args)); + $entry->{exists_p} = 1; } - $entry->{exists_p} = 1; - dbg("auto-whitelist: sql-based add_score: created new entry ". - "with totscore %s: %s", $score, join('|',@args[2..$#args])); - $sth->finish(); } return $entry; @@ -387,8 +409,8 @@ my $rc = $sth->execute(@args); if (!$rc) { - my $err = $self->{dbh}->errstr; - dbg("auto-whitelist: sql-based remove_entry: SQL error: $err"); + info("auto-whitelist: sql-based remove_entry %s: SQL error: %s", + join('|',@args), $sth->errstr); } else { # We might normally have a dbg saying we removed the address Modified: spamassassin/trunk/sql/README.awl URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/README.awl?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/sql/README.awl (original) +++ spamassassin/trunk/sql/README.awl Tue Nov 24 16:32:51 2009 @@ -74,7 +74,7 @@ username varchar(100) # this is the username whose e-mail is being filtered email varchar(200) # this is the address key - ip varchar(16) # this is the ip key (fits IPv4/24 or IPv6/48) + ip varchar(40) # this is the ip key (fits IPv4 IPv6) count int(11) # this is the message counter totscore float # this is the total calculated score signedby varchar(255) # a DKIM or DomainKeys signing domain(s) @@ -108,7 +108,7 @@ CREATE TABLE awl ( username varchar(100) NOT NULL default '', email varchar(255) NOT NULL default '', - ip varchar(16) NOT NULL default '', + ip varchar(40) NOT NULL default '', count int(11) NOT NULL default '0', totscore float NOT NULL default '0', signedby varchar(255) NOT NULL default '', @@ -137,11 +137,11 @@ auto_whitelist_distinguish_signed 1 To extend a field awl.ip on an existing table to be able to fit -an IPv6 addresses (its /48 network part) or an IPv4 address (/24): +an IPv6 addresses (39 characters would suffice) or an IPv4 address: under MySQL: - ALTER TABLE awl MODIFY ip varchar(16); + ALTER TABLE awl MODIFY ip varchar(40); under PostgreSQL: - ALTER TABLE awl ALTER ip TYPE varchar(16); + ALTER TABLE awl ALTER ip TYPE varchar(40); Once you have created the database and added the table, just add the Modified: spamassassin/trunk/sql/awl_mysql.sql URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/awl_mysql.sql?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/sql/awl_mysql.sql (original) +++ spamassassin/trunk/sql/awl_mysql.sql Tue Nov 24 16:32:51 2009 @@ -1,7 +1,7 @@ CREATE TABLE awl ( username varchar(100) NOT NULL default '', email varchar(255) NOT NULL default '', - ip varchar(16) NOT NULL default '', + ip varchar(40) NOT NULL default '', count int(11) NOT NULL default '0', totscore float NOT NULL default '0', signedby varchar(255) NOT NULL default '', Modified: spamassassin/trunk/sql/awl_pg.sql URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/awl_pg.sql?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/sql/awl_pg.sql (original) +++ spamassassin/trunk/sql/awl_pg.sql Tue Nov 24 16:32:51 2009 @@ -1,7 +1,7 @@ CREATE TABLE awl ( username varchar(100) NOT NULL default '', email varchar(255) NOT NULL default '', - ip varchar(16) NOT NULL default '', + ip varchar(40) NOT NULL default '', count bigint NOT NULL default '0', totscore float NOT NULL default '0', signedby varchar(255) NOT NULL default '', Modified: spamassassin/trunk/t/data/spam/004 URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/004?rev=883770&r1=883769&r2=883770&view=diff ============================================================================== --- spamassassin/trunk/t/data/spam/004 (original) +++ spamassassin/trunk/t/data/spam/004 Tue Nov 24 16:32:51 2009 @@ -16,7 +16,7 @@ Received: from ns.sakakura-kk.co.jp (ns.sakakura-kk.co.jp [61.119.13.18]) by mail.netnoteinc.com (Postfix) with ESMTP id 4FD6F1143D6 for <jm [at] netnoteinc>; Thu, 6 Dec 2001 23:58:02 +0000 (Eire) -Received: from plain (ZHONGXIN [212.17.88.134]) by ns.sakakura-kk.co.jp +Received: from plain (ZHONGXIN [212.17.35.134]) by ns.sakakura-kk.co.jp with SMTP (Microsoft Exchange Internet Mail Service Version 5.5.2653.13) id YJQP6DKP; Fri, 7 Dec 2001 08:15:01 +0900 Received: from 144.137.3.98 (SquirrelMail authenticated user jmmail) by
|