
felicity at apache
Jan 24, 2004, 11:36 AM
Post #1 of 1
(131 views)
Permalink
|
|
svn commit: rev 6260 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
|
|
Author: felicity Date: Sat Jan 24 10:36:47 2004 New Revision: 6260 Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Log: bug 2956: uri tests weren't matching where they should if uris were encoded improperly. we now reencode the uris we found correctly and check them too. Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Jan 24 10:36:47 2004 @@ -1442,6 +1442,8 @@ sub get_uri_list { my ($self) = @_; + $self->{found_bad_uri_encoding} = 0; + my $textary = $self->get_decoded_body_text_array(); my ($rulename, $pat, @uris); local ($_); @@ -1491,6 +1493,15 @@ #warn("Got URI: $uri\n"); push @uris, $uri; + } + } + + # Make sure we catch bad encoding tricks ... + foreach my $uri ( @uris ) { + my $nuri = Mail::SpamAssassin::Util::URLEncode($uri); + if ( $nuri ne $uri ) { + push(@uris, $nuri); + $self->{found_bad_uri_encoding} = 1; } } Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sat Jan 24 10:36:47 2004 @@ -584,12 +584,40 @@ # Get the type out ... $ct =~ s/;.*$//; # strip everything after first semi-colon $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@; # only something/something ... - $ct =~ tr!\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135!!d; # strip inappropriate chars + $ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d; # strip inappropriate chars return wantarray ? ($ct,$boundary) : $ct; } ########################################################################### + +sub URLEncode { + my($url)=@_; + my(@characters)=split(/(\%[0-9a-fA-F]{2})/,$url); + + foreach(@characters) { + if ( /\%[0-9a-fA-F]{2}/ ) { # Escaped character set ... + # IF it is in the range of 0x00-0x20 or 0x7f-0xff + # or it is one of "<", ">", """, "#", "%", + # ";", "/", "?", ":", "@", "=" or "&" + # THEN preserve its encoding + unless ( /(20|7f|[0189a-fA-F][0-9a-fA-F])/i + || /2[2356fF]|3[a-fA-F]|40/i ) + { + s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e; + } + } + else { # Other stuff + # 0x00-0x20, 0x7f-0xff, <, >, and " ... " + s/([\000-\040\177-\377\074\076\042]) + /sprintf "%%%02x",unpack("C",$1)/egx; + } + } + return join("",@characters); +} + +########################################################################### + sub dbg { Mail::SpamAssassin::dbg (@_); } 1;
|