
uwe at thetaphi
Oct 12, 2009, 11:11 AM
Post #14 of 14
(1001 views)
Permalink
|
The source code attachment got somehow lost: import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.search.*; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; public class TestNRQWithQueryParser extends LuceneTestCase { public void test() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); for (int l=-5000; l<=5000; l++) { Document doc = new Document(); doc.add(new Field("text", "the big brown", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new NumericField("trie", Field.Store.NO, true).setIntValue(l)); writer.addDocument(doc); } writer.close(); Searcher searcher=new IndexSearcher(directory, true); QueryParser parser = new QueryParser("text", new WhitespaceAnalyzer()) { @Override protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { if ("trie".equals(field)) { return NumericRangeQuery.newIntRange(field, Integer.parseInt(part1), Integer.parseInt(part2), inclusive, inclusive); } else { return super.newRangeQuery(field, part1, part2, inclusive); } } @Override protected Query newTermQuery(Term term) { if("trie".equals(term.field())) { return new TermQuery(new Term(term.field(), NumericUtils.intToPrefixCoded(Integer.parseInt(term.text())))); } else { return super.newTermQuery(term); } } }; TopDocs td; td = searcher.search(parser.parse("+trie:[20 TO 30]"), 5000); assertEquals(11, td.totalHits); td = searcher.search(parser.parse("+trie:[-4999 TO -4000]"), 5000); assertEquals(1000, td.totalHits); td = searcher.search(parser.parse("the big brown +trie:[-4999 TO -4000]"), 5000); assertEquals(1000, td.totalHits); td = searcher.search(parser.parse("+trie:77"), 5000); assertEquals(1, td.totalHits); td = searcher.search(parser.parse("+trie:5001"), 5000); assertEquals(0, td.totalHits); td = searcher.search(parser.parse("the big brown +trie:\"-2\""), 5000); assertEquals(1, td.totalHits); td = searcher.search(parser.parse("+trie:\"-5001\""), 5000); assertEquals(0, td.totalHits); searcher.close(); directory.close(); } } ----- Uwe Schindler H.-H.-Meier-Allee 63, D-28213 Bremen http://www.thetaphi.de eMail: uwe [at] thetaphi > -----Original Message----- > From: Uwe Schindler [mailto:uwe [at] thetaphi] > Sent: Monday, October 12, 2009 8:10 PM > To: java-user [at] lucene > Subject: RE: How do you properly use NumericField > > Hallo Paul, > > I implemented what you wanted in the applied testcase. Works without > problems. Your error was, that in the TermQuery creation you placed a > precisionStep in the shift value parameter which is incorrect. > > By the way: Lucene 2.9.1 and Lucene 3.0 will be optimized for ranges like > [1 > TO 1], because this is now as fast as a TermQuery, but you can > NumericRangeQuery for it (and do not need to encode the terms). Just > replace > the TermQuery with NumericUtils in the newTermQuery method by a > NumericRangeQuery with upper and lower bound equal (and not exclusive). > > Please note: negative numbers in the query parser may lead to problems, > because of this they needed to be placed in "" ("-" is the sign for > exclusion terms). The test may fail with other Analyzers that corrupt your > numbers. > > Uwe > > > -----Original Message----- > > From: Uwe Schindler [mailto:uwe [at] thetaphi] > > Sent: Monday, October 12, 2009 5:49 PM > > To: java-user [at] lucene; paul_t100 [at] fastmail > > Subject: RE: How do you properly use NumericField > > > > Can you print the upper and lower term or the term you received in > > newRangeQuery and newTermQuery also to System.out? Maybe it is converted > > somehow by your Analyzer, that is used for parsing the query. > > > > ----- > > Uwe Schindler > > H.-H.-Meier-Allee 63, D-28213 Bremen > > http://www.thetaphi.de > > eMail: uwe [at] thetaphi > > > > > > > -----Original Message----- > > > From: Paul Taylor [mailto:paul_t100 [at] fastmail] > > > Sent: Monday, October 12, 2009 1:00 PM > > > To: java-user [at] lucene > > > Subject: Re: How do you properly use NumericField > > > > > > Uwe Schindler wrote: > > > > I forgot: The format of numeric fields is also not plain text, > because > > > of > > > > this a simple TermQuery as generated by your query parser will not > > work, > > > > too. > > > > > > > > If you want to hit numeric values without a NumericRangeQuery with > > lower > > > and > > > > upper bound equal, you have to use NumericUtils to translate the > term > > > text, > > > > e.g. new TermQuery(new Term("field", > > > > NumericUtils.intToPrefixCoded(value,precstep))) > > > > > > > > If you want support for this in QueryParser, you have to override > > > > QueryParser.newTermQuery as explained before for newRangeQuery. By > the > > > way, > > > > Solr does this in exactly that way. > > > > > > > > Uwe > > > > > > > > > > Ok, Im trying my best here but still cannot get range or single term > > > query searching to work. > > > > > > package org.musicbrainz.search.servlet; > > > > > > import junit.framework.TestCase; > > > import org.apache.lucene.analysis.Analyzer; > > > import org.apache.lucene.document.Document; > > > import org.apache.lucene.document.NumericField; > > > import org.apache.lucene.index.IndexWriter; > > > import org.apache.lucene.index.Term; > > > import org.apache.lucene.queryParser.QueryParser; > > > import org.apache.lucene.search.*; > > > import org.apache.lucene.store.RAMDirectory; > > > import org.apache.lucene.util.NumericUtils; > > > import org.musicbrainz.search.index.TrackAnalyzer; > > > > > > public class NumericFieldTest extends TestCase { > > > > > > public void testNumericFields() throws Exception { > > > Analyzer analyzer = new TrackAnalyzer(); > > > RAMDirectory dir = new RAMDirectory(); > > > IndexWriter writer = new IndexWriter(dir, analyzer, true, > > > IndexWriter.MaxFieldLength.LIMITED); > > > Document doc = new Document(); > > > NumericField nf = new NumericField("dur"); > > > nf.setIntValue(123); > > > writer.addDocument(doc); > > > writer.close(); > > > > > > IndexSearcher searcher = new IndexSearcher(dir,true); > > > { > > > > > > Query q = new > > > MusicbrainzQueryParser("dur",analyzer).parse("[12 TO 124]"); > > > assertEquals(1, searcher.search(q,10).totalHits); > > > > > > > > > q = new > MusicbrainzQueryParser("dur",analyzer).parse("123"); > > > assertEquals(1, searcher.search(q,10).totalHits); > > > > > > > > > } > > > } > > > > > > static class MusicbrainzQueryParser extends QueryParser { > > > > > > public MusicbrainzQueryParser(String field, Analyzer a) { > > > super(field, a); > > > System.out.println("init parser"); > > > } > > > > > > public Query newRangeQuery(String field, > > > String part1, > > > String part2, > > > boolean inclusive) > > > { > > > System.out.println("RangeQuery"); > > > TermRangeQuery query = (TermRangeQuery) > > > super.newRangeQuery(field, part1, part2, > > > inclusive); > > > > > > if ("dur".equals(field)) { > > > System.out.println("durRangeQuery"); > > > > > > return NumericRangeQuery.newIntRange( > > > "dur", > > > Integer.parseInt(query.getLowerTerm()), > > > Integer.parseInt(query.getUpperTerm()), > > > query.includesLower(), > > > query.includesUpper()); > > > } else { > > > return query; > > > } > > > } > > > > > > protected Query newTermQuery(Term term) > > > { > > > System.out.println("newTermQuery"); > > > if(term.field().equals("dur")) { > > > System.out.println("dur,newTermQuery"); > > > TermQuery tq = new TermQuery(new Term("field", > > > > > > > > > NumericUtils.intToPrefixCoded(Integer.parseInt(term.text()),NumericUtils.P > > > RECISION_STEP_DEFAULT))); > > > return tq; > > > } > > > else { > > > return super.newTermQuery(term); > > > } > > > } > > > } > > > > > > } > > > > > > --------------------------------------------------------------------- > > > To unsubscribe, e-mail: java-user-unsubscribe [at] lucene > > > For additional commands, e-mail: java-user-help [at] lucene > > > > > > > > --------------------------------------------------------------------- > > To unsubscribe, e-mail: java-user-unsubscribe [at] lucene > > For additional commands, e-mail: java-user-help [at] lucene >
|