
felicity at apache
Jan 21, 2004, 2:49 PM
Post #1 of 1
(68 views)
Permalink
|
|
svn commit: rev 6249 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/MIME masses spamd
|
|
Author: felicity Date: Wed Jan 21 13:49:00 2004 New Revision: 6249 Added: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm - copied, changed from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm - copied, changed from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm Removed: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/NoMailAudit.pm Modified: incubator/spamassassin/trunk/MANIFEST incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm incubator/spamassassin/trunk/masses/mass-check incubator/spamassassin/trunk/spamassassin.raw incubator/spamassassin/trunk/spamd/spamd.raw Log: work towards merging new parser into other areas ... Modified: incubator/spamassassin/trunk/MANIFEST ============================================================================== --- incubator/spamassassin/trunk/MANIFEST (original) +++ incubator/spamassassin/trunk/MANIFEST Wed Jan 21 13:49:00 2004 @@ -38,11 +38,10 @@ lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/Locales.pm lib/Mail/SpamAssassin/Locker.pm -lib/Mail/SpamAssassin/MIME.pm -lib/Mail/SpamAssassin/MIME/Parser.pm +lib/Mail/SpamAssassin/MsgContainer.pm +lib/Mail/SpamAssassin/MsgParser.pm lib/Mail/SpamAssassin/MailingList.pm lib/Mail/SpamAssassin/NetSet.pm -lib/Mail/SpamAssassin/NoMailAudit.pm lib/Mail/SpamAssassin/PerMsgLearner.pm lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/PersistentAddrList.pm Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm Wed Jan 21 13:49:00 2004 @@ -63,7 +63,7 @@ =head1 SYNOPSIS - my $mail = Mail::SpamAssassin::NoMailAudit->new(); + my $mail = Mail::SpamAssassin::MsgParser->parse(); my $spamtest = Mail::SpamAssassin->new(); my $status = $spamtest->check ($mail); @@ -111,7 +111,7 @@ use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::ConfSourceSQL; use Mail::SpamAssassin::PerMsgStatus; -use Mail::SpamAssassin::NoMailAudit; +use Mail::SpamAssassin::MsgParser; use Mail::SpamAssassin::Bayes; use File::Basename; @@ -636,7 +636,7 @@ sub check_message_text { my $self = shift; my @lines = split (/^/m, $_[0]); - my $mail_obj = Mail::SpamAssassin::NoMailAudit->new ('data' => \@lines); + my $mail_obj = Mail::SpamAssassin::MsgParser->parse (\@lines); return $self->check ($mail_obj); } @@ -682,7 +682,7 @@ # Let's make sure the markup was removed first ... my @msg = split (/^/m, $self->remove_spamassassin_markup($mail)); - $mail = Mail::SpamAssassin::NoMailAudit->new ('data' => \@msg); + $mail = Mail::SpamAssassin::MsgParser->parse (\@msg); # learn as spam if enabled if ( $self->{conf}->{bayes_learn_during_report} ) { @@ -726,7 +726,7 @@ # Let's make sure the markup was removed first ... my @msg = split (/^/m, $self->remove_spamassassin_markup($mail)); - $mail = Mail::SpamAssassin::NoMailAudit->new ('data' => \@msg); + $mail = Mail::SpamAssassin::MsgParser->parse (\@msg); # learn as nonspam $self->learn ($mail, undef, 0, 0); @@ -1116,7 +1116,7 @@ dbg ("ignore: test message to precompile patterns and load modules"); $self->init($use_user_prefs); - my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@testmsg); + my $mail = Mail::SpamAssassin::MsgParser->parse(\@testmsg); my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail, { disable_auto_learning => 1 } ); $status->word_is_in_dictionary("aba"); # load triplets.txt into memory @@ -1159,7 +1159,7 @@ $self->init(1); $self->{syntax_errors} += $self->{conf}->{errors}; - my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@testmsg); + my $mail = Mail::SpamAssassin::MsgParser->parse(\@testmsg); my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail, { disable_auto_learning => 1 } ); $status->check(); Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm Wed Jan 21 13:49:00 2004 @@ -64,7 +64,7 @@ use Mail::SpamAssassin; use Mail::SpamAssassin::ArchiveIterator; -use Mail::SpamAssassin::NoMailAudit; +use Mail::SpamAssassin::MsgParser; use Mail::SpamAssassin::PerMsgLearner; use Getopt::Long; @@ -334,13 +334,13 @@ { die 'HITLIMIT'; } $messagecount++; - my $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref); + my $ma = Mail::SpamAssassin::MsgParser->parse ($dataref); if ($ma->get ("X-Spam-Checker-Version")) { my $newtext = $spamtest->remove_spamassassin_markup($ma); my @newtext = split (/^/m, $newtext); $dataref = \@newtext; - $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref); + $ma = Mail::SpamAssassin::MsgParser->parse ($dataref); } $ma->{noexit} = 1; Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Wed Jan 21 13:49:00 2004 @@ -3623,8 +3623,8 @@ sub _multipart_alternative_difference { my($self) = @_; - my @ma = $self->{msg}->{mime_parts}->find_parts(qr@^multipart/alternative\b@i); - my @content = $self->{msg}->{mime_parts}->content_summary(); + my @ma = $self->{msg}->find_parts(qr@^multipart/alternative\b@i); + my @content = $self->{msg}->content_summary(); $self->{madiff} = 0; Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm (from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm) ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm Wed Jan 21 13:49:00 2004 @@ -59,7 +59,7 @@ # University of Illinois, Urbana-Champaign. # </@LICENSE> -package Mail::SpamAssassin::MIME; +package Mail::SpamAssassin::MsgContainer; use strict; use MIME::Base64; use Mail::SpamAssassin; @@ -72,6 +72,7 @@ sub new { my $class = shift; $class = ref($class) || $class; + my %opts = @_; my $self = { headers => {}, @@ -80,6 +81,10 @@ header_order => [], }; + foreach ( 'noexit' ) { + $self->{$_} = $opts{$_} if ( exists $opts{$_} ); + } + bless($self,$class); $self; @@ -367,6 +372,91 @@ return $header; } + +sub get_pristine_header { + my ($self, $hdr) = @_; + + return $self->{pristine_headers} unless $hdr; + my(@ret) = $self->{pristine_headers} =~ /^(?:$hdr:[ ]+(.*\n(?:\s+\S.*\n)*))/mig; + if (@ret) { + return wantarray ? @ret : $ret[-1]; + } + else { + return $self->get_header($hdr); + } +} + +#sub get { shift->get_header(@_); } +sub get_header { + my ($self, $hdr, $raw) = @_; + $raw ||= 0; + + # And now pick up all the entries into a list + # This is assumed to include a newline at the end ... + # This is also assumed to have removed continuation bits ... + my @hdrs; + if ( $raw ) { + @hdrs = map { s/\r?\n\s+/ /g; $_; } $self->raw_header($hdr); + } + else { + @hdrs = map { "$_\n" } $self->header($hdr); + } + + if (wantarray) { + return @hdrs; + } + else { + return $hdrs[-1]; + } +} + +#sub header { shift->get_all_headers(@_); } +sub get_all_headers { + my ($self, $raw) = @_; + $raw ||= 0; + + my %cache = (); + my @lines = (); + + foreach ( @{$self->{header_order}} ) { + push(@lines, "$_: ".($self->get_header($_,$raw))[$cache{$_}++]); + } + + if (wantarray) { + return @lines; + } else { + return join ('', @lines); + } +} + +#sub body { return shift->get_body(@_); } +sub get_body { + my ($self) = @_; + my @ret = split(/^/m, $self->{pristine_body}); + return \@ret; +} + +# --------------------------------------------------------------------------- + +sub get_pristine { + my ($self) = @_; + return $self->{pristine_headers} . $self->{pristine_body}; +} + +sub get_pristine_body { + my ($self) = @_; + return $self->{pristine_body}; +} + +sub as_string { + my ($self) = @_; + return $self->get_all_headers(1) . "\n" . $self->{pristine_body}; +} + +sub ignore { + my ($self) = @_; + exit (0) unless $self->{noexit}; +} sub dbg { Mail::SpamAssassin::dbg (@_); } Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm (from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm) ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm Wed Jan 21 13:49:00 2004 @@ -1,6 +1,6 @@ =head1 NAME -Mail::SpamAssassin::MIME::Parser - parse, decode, and render MIME body parts +Mail::SpamAssassin::MsgParser - parse, decode, and render MIME body parts =head1 SYNOPSIS @@ -17,21 +17,21 @@ =cut -package Mail::SpamAssassin::MIME::Parser; +package Mail::SpamAssassin::MsgParser; use strict; use Mail::SpamAssassin; -use Mail::SpamAssassin::MIME; +use Mail::SpamAssassin::MsgContainer; =item parse() -Unlike most modules, Mail::SpamAssassin::MIME::Parser will not return an -object of the same type, but rather a Mail::SpamAssassin::MIME object. -To use it, simply call C<Mail::SpamAssassin::MIME::Parser->parse($msg)>, +Unlike most modules, Mail::SpamAssassin::MsgParser will not return an +object of the same type, but rather a Mail::SpamAssassin::MsgContainer object. +To use it, simply call C<Mail::SpamAssassin::MsgParser->parse($msg)>, where $msg is a scalar with the entire contents of the mesage. The procedure used to parse a message is recursive and ends up generating -a tree of M::SA::MIME objects. parse() will generate the parent node +a tree of M::SA::MsgContainer objects. parse() will generate the parent node of the tree, then pass the body of the message to _parse_body() which begins the recursive process. @@ -41,6 +41,7 @@ sub parse { my($self,$message) = @_; + $message ||= \*STDIN; dbg("---- MIME PARSER START ----"); @@ -64,7 +65,7 @@ shift @message if ( @message > 0 && $message[0] =~ /^From\s/ ); # Generate the main object and parse the appropriate MIME-related headers into it. - my $msg = Mail::SpamAssassin::MIME->new(); + my $msg = Mail::SpamAssassin::MsgContainer->new(); my $header = ''; # Go through all the headers of the message @@ -180,7 +181,7 @@ # Else, there's no boundary, so leave the whole part... } - my $part_msg = Mail::SpamAssassin::MIME->new(); # prepare a new tree node + my $part_msg = Mail::SpamAssassin::MsgContainer->new(); # prepare a new tree node my $in_body = 0; my $header; my $part_array; @@ -214,7 +215,7 @@ # make sure we start with a new clean node $in_body = 0; - $part_msg = Mail::SpamAssassin::MIME->new(); + $part_msg = Mail::SpamAssassin::MsgContainer->new(); undef $part_array; undef $header; Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm Wed Jan 21 13:49:00 2004 @@ -67,7 +67,7 @@ 'rules_filename' => '/etc/spamassassin.rules', 'userprefs_filename' => $ENV{HOME}.'/.spamassassin.cf' }); - my $mail = Mail::SpamAssassin::NoMailAudit->new(); + my $mail = Mail::SpamAssassin::MsgParser->parse(); my $status = $spamtest->learn ($mail); ... Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jan 21 13:49:00 2004 @@ -67,7 +67,7 @@ 'rules_filename' => '/etc/spamassassin.rules', 'userprefs_filename' => $ENV{HOME}.'/.spamassassin.cf' }); - my $mail = Mail::SpamAssassin::NoMailAudit->new(); + my $mail = Mail::SpamAssassin::MsgParser->parse(); my $status = $spamtest->check ($mail); if ($status->is_spam()) { @@ -101,6 +101,7 @@ use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Received; use Mail::SpamAssassin::Util; +use Mail::SpamAssassin::MsgParser; use constant MAX_BODY_LINE_LENGTH => 2048; @@ -750,7 +751,7 @@ EOM my @lines = split (/^/m, $newmsg); - return Mail::SpamAssassin::NoMailAudit->new(data => \@lines); + return Mail::SpamAssassin::MsgParser->parse(\@lines); } sub rewrite_headers { @@ -792,7 +793,7 @@ } push(@pristine_headers, "\n", split (/^/m, $self->{msg}->get_pristine_body())); - return Mail::SpamAssassin::NoMailAudit->new(data => \@pristine_headers); + return Mail::SpamAssassin::MsgParser->parse(\@pristine_headers); } sub _process_header { @@ -1295,7 +1296,7 @@ my $getraw = ($hdrname eq 'ALL' || $hdrname =~ s/:raw$//); if ($hdrname eq 'ALL') { - $_ = $self->{msg}->get_all_headers(); + $_ = $self->{msg}->get_all_headers($getraw); } # EnvelopeFrom: the SMTP MAIL FROM: addr elsif ($hdrname eq 'EnvelopeFrom') { @@ -1306,22 +1307,22 @@ } # ToCc: the combined recipients list elsif ($hdrname eq 'ToCc') { - $_ = join ("\n", $self->{msg}->get_header ('To')); + $_ = join ("\n", $self->{msg}->get_header ('To', $getraw)); if ($_ ne '') { chop $_; $_ .= ", " if /\S/; } - $_ .= join ("\n", $self->{msg}->get_header ('Cc')); + $_ .= join ("\n", $self->{msg}->get_header ('Cc', $getraw)); undef $_ if $_ eq ''; } # MESSAGEID: handle lists which move the real message-id to another # header for resending. elsif ($hdrname eq 'MESSAGEID') { $_ = join ("\n", grep { defined($_) && length($_) > 0 } - $self->{msg}->get_header ('X-Message-Id'), - $self->{msg}->get_header ('Resent-Message-Id'), - $self->{msg}->get_header ('X-Original-Message-ID'), # bug 2122 - $self->{msg}->get_header ('Message-Id')); + $self->{msg}->get_header ('X-Message-Id', $getraw), + $self->{msg}->get_header ('Resent-Message-Id', $getraw), + $self->{msg}->get_header ('X-Original-Message-ID', $getraw), # bug 2122 + $self->{msg}->get_header ('Message-Id', $getraw)); } # untrusted relays list, as string elsif ($hdrname eq 'X-Spam-Relays-Untrusted') { @@ -1333,7 +1334,7 @@ } # a conventional header else { - my @hdrs = $self->{msg}->get_header ($hdrname); + my @hdrs = $self->{msg}->get_header ($hdrname, $getraw); if ($#hdrs >= 0) { $_ = join ('', @hdrs); } @@ -1355,9 +1356,6 @@ s/^[\'\"]*(.*?)[\'\"]*\s*<.+>\s*$/$1/g # Foo Blah <jm [at] fo> or s/^.+\s\((.*?)\)\s*$/$1/g; # jm [at] fo (Foo Blah) } - elsif (!$getraw) { - $_ = $self->mime_decode_header ($_); - } } $self->{hdr_cache}->{$request} = $_; } @@ -2372,8 +2370,8 @@ # cannot trust any Envelope-From headers, since they're likely to be # incorrect fetchmail guesses. - if ($self->get ("X-Sender")) { - my $rcvd = $self->get ("Received"); + if ($self->get ("X-Sender", 1)) { + my $rcvd = $self->get ("Received", 1); if ($rcvd =~ /\(fetchmail/) { dbg ("X-Sender and fetchmail signatures found, cannot trust envelope-from"); return undef; @@ -2381,13 +2379,13 @@ } # procmailrc notes this, amavisd are adding it, we recommend it - if ($envf = $self->get ("X-Envelope-From")) { goto ok; } + if ($envf = $self->get ("X-Envelope-From", 1)) { goto ok; } # qmail, new-inject(1) - if ($envf = $self->get ("Envelope-Sender")) { goto ok; } + if ($envf = $self->get ("Envelope-Sender", 1)) { goto ok; } # Postfix, sendmail, also mentioned in RFC821 - if ($envf = $self->get ("Return-Path")) { goto ok; } + if ($envf = $self->get ("Return-Path", 1)) { goto ok; } # give up. return undef; Modified: incubator/spamassassin/trunk/masses/mass-check ============================================================================== --- incubator/spamassassin/trunk/masses/mass-check (original) +++ incubator/spamassassin/trunk/masses/mass-check Wed Jan 21 13:49:00 2004 @@ -120,7 +120,7 @@ eval "use bytes"; use Mail::SpamAssassin::ArchiveIterator; use Mail::SpamAssassin; -use Mail::SpamAssassin::NoMailAudit; +use Mail::SpamAssassin::MsgParser; use Getopt::Long; use POSIX qw(strftime); use constant HAS_TIME_PARSEDATE => eval { require Time::ParseDate; }; @@ -286,7 +286,7 @@ my ($id, $time, $dataref) = @_; my $out; - my $ma = Mail::SpamAssassin::NoMailAudit->new('data' => $dataref); + my $ma = Mail::SpamAssassin::MsgParser->parse($dataref); $ma->{noexit} = 1; # remove SpamAssassin markup, if present and the mail was spam @@ -295,7 +295,7 @@ my $newtext = $spamtest->remove_spamassassin_markup($ma); my @newtext = split (/^/m, $newtext); $dataref = \@newtext; - $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref); + $ma = Mail::SpamAssassin::MsgParser->parse ($dataref); } my $status = $spamtest->check($ma); Modified: incubator/spamassassin/trunk/spamassassin.raw ============================================================================== --- incubator/spamassassin/trunk/spamassassin.raw (original) +++ incubator/spamassassin/trunk/spamassassin.raw Wed Jan 21 13:49:00 2004 @@ -64,7 +64,7 @@ eval { require Mail::SpamAssassin; - require Mail::SpamAssassin::NoMailAudit; + require Mail::SpamAssassin::MsgParser; # gnu_getopt is not available in Getopt::Long 2.24, see bug 732 # gnu_compat neither. @@ -123,9 +123,8 @@ my $mail; - use Mail::SpamAssassin::NoMailAudit; if (!$opt{'lint'} && !$doing_address_only_whitelisting) { - $mail = Mail::SpamAssassin::NoMailAudit->new (); + $mail = Mail::SpamAssassin::MsgParser->parse (); } # create the tester factory Modified: incubator/spamassassin/trunk/spamd/spamd.raw ============================================================================== --- incubator/spamassassin/trunk/spamd/spamd.raw (original) +++ incubator/spamassassin/trunk/spamd/spamd.raw Wed Jan 21 13:49:00 2004 @@ -25,7 +25,7 @@ use IO::Pipe; use Mail::SpamAssassin; -use Mail::SpamAssassin::NoMailAudit; +use Mail::SpamAssassin::MsgParser; use Mail::SpamAssassin::NetSet; use Getopt::Long; @@ -731,9 +731,7 @@ "." ); - my $mail = Mail::SpamAssassin::NoMailAudit->new ( - data => \@msglines - ); + my $mail = Mail::SpamAssassin::MsgParser->parse (\@msglines); # Check length if we're supposed to if($expected_length && ($actual_length != $expected_length)) {
|