
marvin at rectangular
Sep 10, 2008, 10:33 PM
Post #1 of 1
(2378 views)
Permalink
|
|
r3872 - in trunk/perl: lib/KinoSearch/Docs/Cookbook sample
|
|
Author: creamyg Date: 2008-09-10 22:33:15 -0700 (Wed, 10 Sep 2008) New Revision: 3872 Added: trunk/perl/sample/PrefixQuery.pm Modified: trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod trunk/perl/sample/FlatQueryParser.pm Log: Add PrefixQuery, revise FlatQueryParser -- synchronize them with the Cookbook entries. Fix a bunch of bugs in both. Modified: trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod =================================================================== --- trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod 2008-09-11 05:32:12 UTC (rev 3871) +++ trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod 2008-09-11 05:33:15 UTC (rev 3872) @@ -217,7 +217,7 @@ sub DESTROY { my $self = shift; - delete $posting_lists{$$self}; + delete $doc_nums{$$self}; delete $tick{$$self}; delete $tally{$$self}; $self->SUPER::DESTROY; @@ -233,7 +233,7 @@ my $doc_nums = $doc_nums{$$self}; my $tick = ++$tick{$$self}; return 0 if $tick >= scalar @$doc_nums; - return $doc_nums[$tick]; + return $doc_nums->[$tick]; } get_doc_num() returns the current document number, or 0 if the Scorer is @@ -244,7 +244,7 @@ my $self = shift; my $tick = $tick{$$self}; my $doc_nums = $doc_nums{$$self}; - return $tick < scalar @doc_nums ? $doc_nums[$tick] : 0; + return $tick < scalar @$doc_nums ? $doc_nums->[$tick] : 0; } tally() returns an object which isa L<KinoSearch::Search::Tally> and conveys the Modified: trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod =================================================================== --- trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod 2008-09-11 05:32:12 UTC (rev 3871) +++ trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod 2008-09-11 05:33:15 UTC (rev 3872) @@ -207,7 +207,14 @@ conveys field and Analyzer information, so we can just defer to that. package FlatQueryParser; - use base ( KinoSearch::QueryParser ); + use base qw( KinoSearch::QueryParser ); + use KinoSearch::Search::TermQuery; + use KinoSearch::Search::PhraseQuery; + use KinoSearch::Search::ORQuery; + use KinoSearch::Search::NoMatchQuery; + use PrefixQuery; + use Parse::RecDescent; + use Carp; our %rd_parser; @@ -260,11 +267,18 @@ ... sub parse { - my ( $self, $query_string ) = @_; - my $tree = $rd_parser{$$self}->tree($query_string); - return $self->expand($tree); + my ( $self, $query_string ) = @_; + my $tree = $self->tree($query_string); + return $tree ? $self->expand($tree) : + KinoSearch::Search::NoMatchQuery->new; } + sub tree { + my ( $self, $query_string ) = @_; + return $rd_parser{$$self}->tree($query_string); + } + + The magic happens in KinoSearch::QueryParser's expand() method, which walks the ORQuery object we supply to it looking for LeafQuery objects, and calls expand_leaf() for each one it finds. expand_leaf() performs field-specific Modified: trunk/perl/sample/FlatQueryParser.pm =================================================================== --- trunk/perl/sample/FlatQueryParser.pm 2008-09-11 05:32:12 UTC (rev 3871) +++ trunk/perl/sample/FlatQueryParser.pm 2008-09-11 05:33:15 UTC (rev 3872) @@ -3,54 +3,34 @@ package FlatQueryParser; use base qw( KinoSearch::QueryParser ); +use KinoSearch::Search::TermQuery; +use KinoSearch::Search::PhraseQuery; +use KinoSearch::Search::ORQuery; use KinoSearch::Search::NoMatchQuery; +use PrefixQuery; +use Parse::RecDescent; +use Carp; our %rd_parser; -sub new { - my $either = shift; - my $self = $either->SUPER::new(@_); - $rd_parser{$$self} = FlatQueryParser::RecDescent->new; - return $self; -} - -sub parse { - my ( $self, $query_string ) = @_; - my $tree = $self->tree($query_string); - return $tree ? $self->expand($tree) : - KinoSearch::Search::NoMatchQuery->new; -} - -sub tree { - my ( $self, $query_string ) = @_; - return $rd_parser{$$self}->tree($query_string); -} - -sub DESTROY { - my $self = shift; - delete $rd_parser{$$self}; - $self->SUPER::DESTROY; -} - -package FlatQueryParser::RecDescent; -use base qw( Parse::RecDescent ); -use KinoSearch::Search::LeafQuery; -use KinoSearch::Search::ORQuery; - - my $grammar = <<'END_GRAMMAR'; tree: - leaf_query(s) + leaf_queries { $return = KinoSearch::Search::ORQuery->new; $return->add_child($_) for @{ $item[1] }; } +leaf_queries: + leaf_query(s?) + { $item{'leaf_query(s?)'} } + leaf_query: phrase_query + | prefix_query | term_query - + term_query: /(\S+)/ { KinoSearch::Search::LeafQuery->new( text => $1 ) } @@ -58,11 +38,57 @@ phrase_query: /("[^"]*(?:"|$))/ # terminated by either quote or end of string { KinoSearch::Search::LeafQuery->new( text => $1 ) } + +prefix_query: + /(\w+\*)/ + { KinoSearch::Search::LeafQuery->new( text => $1 ) } END_GRAMMAR -sub new { shift->SUPER::new($grammar) } +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + $rd_parser{$$self} = Parse::RecDescent->new($grammar); + return $self; +} +sub DESTROY { + my $self = shift; + delete $rd_parser{$$self}; + $self->SUPER::DESTROY; +} + +sub parse { + my ( $self, $query_string ) = @_; + my $tree = $self->tree($query_string); + return $tree ? $self->expand($tree) : + KinoSearch::Search::NoMatchQuery->new; +} + +sub tree { + my ( $self, $query_string ) = @_; + return $rd_parser{$$self}->tree($query_string); +} + +sub expand_leaf { + my ( $self, $leaf_query ) = @_; + my $text = $leaf_query->get_text; + if ( $text =~ /\*$/ ) { + my $or_query = KinoSearch::Search::ORQuery->new; + for my $field ( @{ $self->get_fields } ) { + my $prefix_query = PrefixQuery->new( + field => $field, + query_string => $text, + ); + $or_query->add_child($prefix_query); + } + return $or_query; + } + else { + return $self->SUPER::expand_leaf($leaf_query); + } +} + 1; __END__ @@ -83,11 +109,7 @@ =head1 DESCRIPTION -FlatQueryParser is a subclass of KinoSearch::QueryParser which supports a more -limited syntax: words and quoted phrases, but no parenthetical groupings, -boolean operators C<AND OR NOT>, or prepended plus/minus. It's intended use -is to serve as sample code for people who want to write their own -grammar-based parsers. +See L<KinoSearch::Docs::Cookbook::CustomQueryParser>. =head1 COPYRIGHT Added: trunk/perl/sample/PrefixQuery.pm =================================================================== --- trunk/perl/sample/PrefixQuery.pm (rev 0) +++ trunk/perl/sample/PrefixQuery.pm 2008-09-11 05:33:15 UTC (rev 3872) @@ -0,0 +1,170 @@ +use strict; +use warnings; + +package PrefixQuery; +use base qw( KinoSearch::Search::Query ); +use Carp; + +# Inside-out member vars and hand-rolled accessors. +my %query_string; +my %field; +sub get_query_string { my $self = shift; return $query_string{$$self} } +sub get_field { my $self = shift; return $field{$$self} } + +sub new { + my ( $class, %args ) = @_; + my $query_string = delete $args{query_string}; + my $field = delete $args{field}; + my $self = $class->SUPER::new(%args); + confess("'query_string' param is required") + unless defined $query_string; + confess("Invalid query_string: '$query_string'") + unless $query_string =~ /\*\s*$/; + confess("'field' param is required") + unless defined $field; + $query_string{$$self} = $query_string; + $field{$$self} = $field; + return $self; +} + +sub DESTROY { + my $self = shift; + delete $query_string{$$self}; + delete $field{$$self}; + $self->SUPER::DESTROY; +} + +sub make_compiler { + my $self = shift; + return PrefixCompiler->new( @_, parent => $self ); +} + +sub to_string { + my $self = shift; + return "$field{$$self}:$query_string{$$self}"; +} + +package PrefixCompiler; +use base qw( KinoSearch::Search::Compiler ); + +sub make_scorer { + my ( $self, $index_reader ) = @_; + + # Acquire a Lexicon and seek it to our query string. + my $substring = $self->get_parent->get_query_string; + $substring =~ s/\*.\s*$//; + my $field = $self->get_parent->get_field; + my $lexicon = $index_reader->lexicon( field => $field ); + return unless $lexicon; + $lexicon->seek($substring); + + # Accumulate PostingLists for each matching term. + my @posting_lists; + while ( defined( my $term = $lexicon->get_term ) ) { + warn "$term $substring"; + last unless $term =~ /^$substring/; + my $posting_list = $index_reader->posting_list( + field => $field, + term => $term, + ); + if ($posting_list) { + push @posting_lists, $posting_list; + } + last unless $lexicon->next; + } + return unless @posting_lists; + + return PrefixScorer->new( posting_lists => \@posting_lists ); +} + +package PrefixScorer; +use base qw( KinoSearch::Search::Scorer ); + +# Inside-out member vars. +my %doc_nums; +my %tally; +my %tick; + +sub new { + my ( $class, %args ) = @_; + my $posting_lists = delete $args{posting_lists}; + my $self = $class->SUPER::new(%args); + + # Cheesy but simple way of interleaving PostingList doc sets. + my %all_doc_nums; + for my $posting_list (@$posting_lists) { + while ( my $doc_num = $posting_list->next ) { + $all_doc_nums{$doc_num} = undef; + } + } + my @doc_nums = sort { $a <=> $b } keys %all_doc_nums; + $doc_nums{$$self} = \@doc_nums; + + $tick{$$self} = -1; + $tally{$$self} = KinoSearch::Search::Tally->new; + $tally{$$self}->set_score(1.0); # fixed score of 1.0 + + return $self; +} + +sub DESTROY { + my $self = shift; + delete $doc_nums{$$self}; + delete $tick{$$self}; + delete $tally{$$self}; + $self->SUPER::DESTROY; +} + +sub next { + my $self = shift; + my $doc_nums = $doc_nums{$$self}; + my $tick = ++$tick{$$self}; + return 0 if $tick >= scalar @$doc_nums; + return $doc_nums->[$tick]; +} + +sub get_doc_num { + my $self = shift; + my $tick = $tick{$$self}; + my $doc_nums = $doc_nums{$$self}; + return $tick < scalar @$doc_nums ? $doc_nums->[$tick] : 0; +} + +sub tally { + my $self = shift; + return $tally{$$self}; +} + +1; + +__END__ + +__POD__ + +=head1 NAME + +PrefixQuery - Sample subclass of KinoSearch::Query, supporting trailing +wildcards. + +=head1 SYNOPSIS + + my $prefix_query = PrefixQuery->new( + field => 'content', + query_string => 'foo*', + ); + my $hits = $searcher->search( query => $prefix_query ); + +=head1 DESCRIPTION + +Seek L<KinoSearch::Docs::Cookbook::CustomQuery>. + +=head1 COPYRIGHT + +Copyright 2008 Marvin Humphrey + +=head1 LICENSE, DISCLAIMER, BUGS, etc. + +See L<KinoSearch> version 0.20. + +=cut + _______________________________________________ kinosearch-commits mailing list kinosearch-commits [at] rectangular http://www.rectangular.com/mailman/listinfo/kinosearch-commits
|