
marvin at rectangular
Sep 11, 2008, 7:53 AM
Post #1 of 1
(2515 views)
Permalink
|
|
r3873 - in trunk: devel/benchmarks devel/benchmarks/indexers devel/bin perl/buildlib/Lucy
|
|
Author: creamyg Date: 2008-09-11 07:53:51 -0700 (Thu, 11 Sep 2008) New Revision: 3873 Modified: trunk/devel/benchmarks/extract_reuters.plx trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm trunk/devel/benchmarks/indexers/kinosearch_indexer.plx trunk/devel/benchmarks/indexers/plucene_indexer.plx trunk/devel/bin/dump_index trunk/devel/bin/predit trunk/devel/bin/syncl trunk/devel/bin/tidyall trunk/perl/buildlib/Lucy/Build.pm Log: Update comment style. Modified: trunk/devel/benchmarks/extract_reuters.plx =================================================================== --- trunk/devel/benchmarks/extract_reuters.plx 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/benchmarks/extract_reuters.plx 2008-09-11 14:53:51 UTC (rev 3873) @@ -5,7 +5,7 @@ use File::Spec::Functions qw( catfile catdir ); use Cwd qw( getcwd ); -# ensure call from correct location and with required arg +# Ensure call from correct location and with required arg. my $source_dir = $ARGV[0]; die "Usage: ./extract_reuters.plx /path/to/expanded/archive" unless -d $source_dir; @@ -13,30 +13,30 @@ die "Must be run from the benchmarks/ directory" unless ( $working_dir =~ /benchmarks\W*$/ ); -# create the main output directory +# Create the main output directory. my $main_out_dir = 'extracted_corpus'; if ( !-d $main_out_dir ) { mkdir $main_out_dir or die "Couldn't mkdir '$main_out_dir': $!"; } -# get a list of the sgm files +# Get a list of the sgm files. opendir SOURCE_DIR, $source_dir or die "Couldn't open directory: $!"; my @sgm_files = grep {/\.sgm$/} readdir SOURCE_DIR; closedir SOURCE_DIR or die "Couldn't close directory: $!"; die "Couldn't find all the sgm files" unless @sgm_files == 22; -# track number of story docs +# Track number of story docs. my $num_files = 0; for my $sgm_file (@sgm_files) { - # get the sgm file + # Get the sgm file. my $sgm_filepath = catfile( $source_dir, $sgm_file ); print "Processing $sgm_filepath\n"; open( my $sgm_fh, '<', $sgm_filepath ) or die "Couldn't open file '$sgm_filepath': $!"; - # prepare output directory + # Prepare output directory. $sgm_file =~ /(\d+)\.sgm$/ or die "no match"; my $out_dir = catdir( $main_out_dir, "articles$1" ); if ( !-d $out_dir ) { @@ -47,13 +47,13 @@ my $in_title = 0; my ( $title, $body ); while (<$sgm_fh>) { - # start a new story doc + # Start a new story doc. if (/<REUTERS/) { $title = ''; $body = ''; } - # extract title and body + # Extract title and body. if (s/.*?<TITLE>//) { $in_title = 1; $title = ''; @@ -73,7 +73,7 @@ $body =~ s#</BODY>.*##s; } - # write out a finished article doc + # Write out a finished article doc. if (m#</REUTERS>#) { die "Malformed data" if ( $in_title or $in_body ); if ( length $title and length $body ) { Modified: trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm =================================================================== --- trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm 2008-09-11 14:53:51 UTC (rev 3873) @@ -61,15 +61,15 @@ } sub start_report { - # start the output + # Start the output. print '-' x 60 . "\n"; } -# Print out aggregate stats +# Print out aggregate stats. sub print_final_report { my ( $self, $times ) = @_; - # produce mean and truncated mean + # Produce mean and truncated mean. my @sorted_times = sort @$times; my $num_to_chop = int( @sorted_times >> 2 ); my $mean = 0; @@ -77,7 +77,7 @@ my $num_kept = 0; for ( my $i = 0; $i < @sorted_times; $i++ ) { $mean += $sorted_times[$i]; - # discard fastest 25% and slowest 25% of runs + # Discard fastest 25% and slowest 25% of runs. next if $i < $num_to_chop; next if $i > ( $#sorted_times - $num_to_chop ); $trunc_mean += $sorted_times[$i]; @@ -90,7 +90,7 @@ $mean = sprintf( "%.2f", $mean ); $trunc_mean = sprintf( "%.2f", $trunc_mean ); - # get some info about the system + # Get some info about the system. my $thread_support = $Config{usethreads} ? "yes" : "no"; my @uname_info = (uname)[ 0, 2, 4 ]; @@ -137,7 +137,7 @@ require KinoSearch; require KinoSearch::InvIndexer; - # provide runtime flexibility + # Provide runtime flexibility. my $schema = $self->{schema} = BenchSchema->new; if ( $self->{store} ) { $schema->add_field( field => 'body', spec => 'text' ); @@ -174,7 +174,7 @@ my ( $max, $increment, $article_filepaths ) = @{$self}{qw( docs increment article_filepaths )}; - # start timer + # Start timer. my $start = gettimeofday(); my $invindexer = $self->init_indexer(0); @@ -182,7 +182,7 @@ my $count = 0; while ( $count < $max ) { for my $article_filepath (@$article_filepaths) { - # the title is the first line, the body is the rest + # The title is the first line, the body is the rest. open( my $article_fh, '<', $article_filepath ) or die "Can't open file '$article_filepath'"; @@ -192,7 +192,7 @@ $invindexer->add_doc( \%doc ); - # bail if we've reached spec'd number of docs + # Bail if we've reached spec'd number of docs. $count++; last if $count >= $max; if ( $count % $increment == 0 and $count ) { @@ -203,10 +203,10 @@ } } - # finish index + # Finish index. $invindexer->finish( optimize => 1 ); - # return elapsed seconds + # Return elapsed seconds. my $end = gettimeofday(); my $secs = $end - $start; return ( $count, $secs ); @@ -250,10 +250,10 @@ my ( $max, $increment, $article_filepaths ) = @{$self}{qw( docs increment article_filepaths )}; - # cause text to be stored if spec'd + # Cause text to be stored if spec'd. my $field_constructor = $self->{store} ? 'Text' : 'UnStored'; - # start timer + # Start timer. my $start = gettimeofday(); my $writer = $self->init_indexer(0); @@ -261,13 +261,13 @@ my $count = 0; while ( $count < $max ) { for my $article_filepath (@$article_filepaths) { - # the title is the first line, the body is the rest + # The title is the first line, the body is the rest. open( my $article_fh, '<', $article_filepath ) or die "Can't open file '$article_filepath'"; my $title = <$article_fh>; my $body = do { local $/; <$article_fh> }; - # add content to index + # Add content to index. my $doc = Plucene::Document->new; $doc->add( Plucene::Document::Field->Text( title => $title ) ); $doc->add( @@ -275,7 +275,7 @@ ); $writer->add_document($doc); - # bail if we've reached spec'd number of docs + # Bail if we've reached spec'd number of docs. $count++; last if ( $count >= $max ); if ( $count % $increment == 0 and $count ) { @@ -285,10 +285,10 @@ } } - # finish index + # Finish index. $writer->optimize; - # return elapsed seconds + # Return elapsed seconds. my $end = gettimeofday(); my $secs = $end - $start; return ( $count, $secs ); Modified: trunk/devel/benchmarks/indexers/kinosearch_indexer.plx =================================================================== --- trunk/devel/benchmarks/indexers/kinosearch_indexer.plx 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/benchmarks/indexers/kinosearch_indexer.plx 2008-09-11 14:53:51 UTC (rev 3873) @@ -9,7 +9,7 @@ use Cwd qw( getcwd ); use BenchmarkingIndexer; -# index all docs and run one iter unless otherwise spec'd +# Index all docs and run one iter unless otherwise spec'd. my ( $num_reps, $max_to_index, $increment, $store, $build_index ); GetOptions( 'reps=s' => \$num_reps, @@ -37,12 +37,12 @@ my @times; for my $rep ( 1 .. $num_reps ) { - # spawn an index-building child process + # Spawn an index-building child process. my $command = "$^X "; - # try to figure out if this program was called with -Mblib + # Try to figure out if this program was called with -Mblib. for (@INC) { next unless /\bblib\b/; - # propagate -Mblib to the child + # Propagate -Mblib to the child. $command .= "-Mblib "; last; } @@ -52,7 +52,7 @@ $command .= "--increment=$increment " if $increment; my $output = `$command`; - # extract elapsed time from the output of the child + # Extract elapsed time from the output of the child. $output =~ /^docs: (\d+) elapsed: ([\d.]+)/ or die "no match: '$output'"; my $docs = $1; Modified: trunk/devel/benchmarks/indexers/plucene_indexer.plx =================================================================== --- trunk/devel/benchmarks/indexers/plucene_indexer.plx 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/benchmarks/indexers/plucene_indexer.plx 2008-09-11 14:53:51 UTC (rev 3873) @@ -8,12 +8,12 @@ use Cwd qw( getcwd ); use BenchmarkingIndexer; -# verify that we're running from the right directory; +# Verify that we're running from the right directory. my $working_dir = getcwd; die "Must be run from benchmarks/" unless $working_dir =~ /benchmarks\W*$/; -# index all docs and run one iter unless otherwise spec'd +# Index all docs and run one iter unless otherwise spec'd. my ( $num_reps, $max_to_index, $increment, $store, $build_index ); GetOptions( 'reps=s' => \$num_reps, @@ -40,12 +40,12 @@ my @times; for my $rep ( 1 .. $num_reps ) { - # spawn an index-building child process + # Spawn an index-building child process. my $command = "$^X "; - # try to figure out if this program was called with -Mblib + # Try to figure out if this program was called with -Mblib. for (@INC) { next unless /\bblib\b/; - # propagate -Mblib to the child + # Propagate -Mblib to the child. $command .= "-Mblib "; last; } @@ -55,7 +55,7 @@ $command .= "--increment=$increment " if $increment; my $output = `$command`; - # extract elapsed time from the output of the child + # Extract elapsed time from the output of the child. $output =~ /^docs: (\d+) elapsed: ([\d.]+)/ or die "no match: '$output'"; my $docs = $1; Modified: trunk/devel/bin/dump_index =================================================================== --- trunk/devel/bin/dump_index 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/bin/dump_index 2008-09-11 14:53:51 UTC (rev 3873) @@ -4,7 +4,7 @@ use KinoSearch::Index::IndexReader; -# parse and validate command line args +# Parse and validate command line args. die "Usage: dump_index SCHEMA_CLASS INDEX_LOCATION" unless @ARGV == 2; my ( $schema_class, $where ) = @ARGV; die "Invalid schema class name: $schema_class" Modified: trunk/devel/bin/predit =================================================================== --- trunk/devel/bin/predit 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/bin/predit 2008-09-11 14:53:51 UTC (rev 3873) @@ -51,7 +51,7 @@ return; } - # confirm with user that the change worked as intended. + # Confirm with user that the change worked as intended. my $diff = diff( \$orig, \$edited ); print "\nFILE: $filepath\n$diff\nApply? "; my $response = <STDIN>; Modified: trunk/devel/bin/syncl =================================================================== --- trunk/devel/bin/syncl 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/bin/syncl 2008-09-11 14:53:51 UTC (rev 3873) @@ -67,7 +67,7 @@ next if $component =~ $exclude; - # warn and skip files that aren't common + # Warn and skip files that aren't common. for ( $source_path, $dest_path ) { if ( !-e $_ ) { print("Don't have $_ ... skipping...\n"); @@ -76,12 +76,12 @@ next FILE unless -f $_; } - # generate a diff if there are changes, otherwise skip + # Generate a diff if there are changes, otherwise skip. my $source_content = slurp($source_path); my $dest_content = slurp($dest_path); my $edited = modify( $source_content, $dest_content ); - # search and replace prefixes, project name + # Search and replace prefixes, project name. my $source_swap = $swaps{$source_proj}; my $dest_swap = $swaps{$dest_proj}; for ($edited) { @@ -96,7 +96,7 @@ next; } - # confirm with user that the change worked as intended. + # Confirm with user that the change worked as intended. my $diff = diff( \$dest_content, \$edited ); print "\nFILE: $dest_path\n$diff\nApply? "; my $response = <STDIN>; Modified: trunk/devel/bin/tidyall =================================================================== --- trunk/devel/bin/tidyall 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/devel/bin/tidyall 2008-09-11 14:53:51 UTC (rev 3873) @@ -10,7 +10,7 @@ my $source_dir = shift @ARGV; die "usage: tidyall DIR" unless defined $source_dir; -# grab all perl filepaths +# Grab all perl filepaths my @paths; find( { wanted => sub { @@ -26,7 +26,7 @@ die "can't find kinotidyrc" unless -f $rc_filepath; for my $path (@paths) { - # grab orig text + # Grab orig text print "$path\n"; open( my $fh, '<', $path ) or die "couldn't open file '$path' for reading: $!"; Modified: trunk/perl/buildlib/Lucy/Build.pm =================================================================== --- trunk/perl/buildlib/Lucy/Build.pm 2008-09-11 05:33:15 UTC (rev 3872) +++ trunk/perl/buildlib/Lucy/Build.pm 2008-09-11 14:53:51 UTC (rev 3873) @@ -134,7 +134,7 @@ my $charmonize_c = catfile( $CHARMONIZER_ORIG_DIR, 'charmonize.c' ); my @all_source = ( $charmonize_c, @$charm_source_files ); - # don't compile if we're up to date + # Don't compile if we're up to date. return if $self->up_to_date( \@all_source, $CHARMONIZE_EXE_PATH ); print "Building $CHARMONIZE_EXE_PATH...\n\n"; @@ -179,7 +179,7 @@ # Clean up after Charmonizer if it doesn't succeed on its own. $self->add_to_cleanup("_charm*"); - # write the infile with which to communicate args to charmonize + # Write the infile with which to communicate args to charmonize. my $os_name = lc( $Config{osname} ); my $flags = "$Config{ccflags} $EXTRA_CCFLAGS"; my $verbosity = $ENV{DEBUG_CHARM} ? 2 : 1; @@ -485,7 +485,7 @@ $self->SUPER::ACTION_code; } -# copied from Module::Build::Base.pm, added exclude '#' and follow symlinks +# Copied from Module::Build::Base.pm, added exclude '#' and follow symlinks. sub rscan_dir { my ( $self, $dir, $pattern ) = @_; my @result; @@ -501,7 +501,7 @@ File::Find::find( { wanted => $subr, no_chdir => 1, follow => 1 }, $dir ); - # skip emacs lock files + # Skip emacs lock files. my @filtered = grep !/#/, @result; return \@filtered; } @@ -560,7 +560,7 @@ =cut -# write the typemap file. +# Write the typemap file. sub ACTION_write_typemap { my $self = shift; @@ -568,7 +568,7 @@ return if ( -e 'typemap' and $self->up_to_date( $pm_filepaths, 'typemap' ) ); - # build up a list of C-struct classes + # Build up a list of C-struct classes. my @struct_classes; my $bp_filepaths = $self->rscan_dir( $C_SOURCE_DIR, qr/\.bp$/ ); for my $bp_path (@$bp_filepaths) { @@ -604,7 +604,7 @@ END_OUTPUT } - # blast it out + # Blast it out. print "Writing typemap\n"; unlink 'typemap'; sysopen( my $typemap_fh, 'typemap', O_CREAT | O_WRONLY | O_EXCL ) @@ -721,7 +721,7 @@ $self->SUPER::ACTION_dist; - # clean up and restore MANIFEST + # Clean up and restore MANIFEST. print "Removing copied files...\n"; rmtree('c_src'); rmtree('devel'); @@ -734,24 +734,24 @@ sub _gen_pause_exclusion_list { my $self = shift; - # only exclude files that are actually on-board + # Only exclude files that are actually on-board. open( my $man_fh, '<', 'MANIFEST' ) or die "Can't open MANIFEST: $!"; my @manifest_entries = <$man_fh>; chomp @manifest_entries; my @excluded_files; for my $entry ( sort @manifest_entries ) { - # allow README + # Allow README. next if $entry =~ m#^README#; - # allow public modules + # Allow public modules. if ( $entry =~ m#^lib.+\.(pm|pod)$# ) { open( my $fh, '<', $entry ) or die "Can't open '$entry': $!"; my $content = do { local $/; <$fh> }; next if $content =~ /=head1\s*NAME/; } - # disallow everything else + # Disallow everything else. push @excluded_files, $entry; } _______________________________________________ kinosearch-commits mailing list kinosearch-commits [at] rectangular http://www.rectangular.com/mailman/listinfo/kinosearch-commits
|