Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Lucene: Java-User

Lucene 2.9.0 / BooleanQuery problem

 

 

Lucene java-user RSS feed   Index | Next | Previous | View Threaded


akaris at gmail

Oct 28, 2009, 7:12 PM

Post #1 of 4 (485 views)
Permalink
Lucene 2.9.0 / BooleanQuery problem

Hi !

I spent all night trying to get a simple BooleanQuery working and I really
can't figure out what is my problem. See this very simple program :

public class test {

@SuppressWarnings("deprecation")
public static void main(String[] args) throws ParseException,
CorruptIndexException, LockObtainFailedException, IOException
{
// Open index directory
File idx = new File("d:/Java/index/");

// Create IndexWriter
IndexWriter writer = new IndexWriter(idx, new
StandardAnalyzer(Version.LUCENE_CURRENT), true,
IndexWriter.MaxFieldLength.LIMITED);

//
// Add some documents to the index
//

Document doc = new Document();
doc.add(new Field("firstname", "Mike", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("phone", "111-222-3333", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("sector", "IT", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("group", "group", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", "blue this is some content",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);

doc = new Document();
doc.add(new Field("firstname", "Pascale", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("lastname", "Lavoie", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("phone", "333-222-1111", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("sector", "Accounting", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("group", "othergroup", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", "red this is some content",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);

doc = new Document();
doc.add(new Field("firstname", "Kaven", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("lastname", "Rouseau", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("phone", "222-333-1111", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("sector", "IT", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("group", "group", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", "red this is some content",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);

doc = new Document();
doc.add(new Field("firstname", "Mike", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("lastname", "Peters", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("phone", "444-444-4444", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("sector", "IT", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("group", "othergroup", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", "this is some content",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);

doc = new Document();
doc.add(new Field("firstname", "Marie-Pier", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("phone", "123-123-1234", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("sector", "Accounting", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("group", "group", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", "this is some content",
Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);

// Optimize + close index
writer.optimize();
writer.close();

// Open IndexReader/Searcher
IndexReader reader = IndexReader.open(idx, true);
Searcher searcher = new IndexSearcher(reader);

//
// Test normal query
//
String m_field = "content";
String m_query = "sector:IT AND group:group";
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
QueryParser parser = new QueryParser(m_field, analyzer);
Query query = parser.parse(m_query);

// Search!
System.out.println("Normal query: " + m_query + "\n");
doStreamingSearch(searcher, query);

System.out.println("===========================================\n");

//
// Test BooleanQuery
//
BooleanQuery query2 = new BooleanQuery();
query2.add(new TermQuery(new Term("sector", "IT")), Occur.MUST);
query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);

// Search!
System.out.println("BooleanQuery:\n");
doStreamingSearch(searcher, query2);
System.out.println("Done!");
}

public static void doStreamingSearch(final Searcher searcher, Query
query) throws IOException
{
Collector streamingHitCollector = new Collector()
{
private Scorer scorer;
private int docBase;

public void collect(int doc) throws IOException
{
Document theDoc = searcher.doc(doc);

System.out.println("+ First Name =
"+theDoc.get("firstname"));
System.out.println("+ Last Name =
"+theDoc.get("lastname"));
System.out.println("+ Phone Number = "+theDoc.get("phone"));
System.out.println("+ Sector =
"+theDoc.get("sector"));
System.out.println("+ Group = "+theDoc.get("group"));
System.out.println("+ Content =
"+theDoc.get("content"));
System.out.println("");
}

public boolean acceptsDocsOutOfOrder()
{
return true;
}

public void setNextReader(IndexReader reader, int docBase)
throws IOException
{
this.docBase = docBase;
}

public void setScorer(Scorer scorer) throws IOException
{
this.scorer = scorer;
}
};
searcher.search(query, streamingHitCollector);
}

}

And here's the output of the program :

Normal query: *sector:IT AND group:group*

+ First Name = Mike
+ Last Name = Nadeau
+ Phone Number = 111-222-3333
+ Sector = IT
+ Group = group
+ Content = blue this is some content

+ First Name = Kaven
+ Last Name = Rouseau
+ Phone Number = 222-333-1111
+ Sector = IT
+ Group = group
+ Content = red this is some content

+ First Name = Marie-Pier
+ Last Name = Nadeau
+ Phone Number = 123-123-1234
*+ Sector = Accounting*
+ Group = group
+ Content = this is some content

===========================================

BooleanQuery:

Done!

So I have 2 important problems:

(1) the normal query (sector:IT AND group:group) - how can it return
"Marie-Pier Nadeau", which has "Accounting" as group?

(2) the BooleanQuery - why isn't it returning anything?

Thanks for your help!

- Mike
akaris [at] gmail


jake.mannix at gmail

Oct 28, 2009, 8:57 PM

Post #2 of 4 (442 views)
Permalink
Re: Lucene 2.9.0 / BooleanQuery problem [In reply to]

Hi Michel,

I don't have time to look in too much detail right now, but I'll bet ya $5
it's because
your query is for "sector:IT" - 'IT' lowercases to 'it' which is in the
default stopword
list, and if you're not careful about how you query with this, you'll end up
with TermQuery
instances which hit nothing, because this term may be stop-listed out of
your index!

Can you run the test again with no stop words in your query, and see what
it
gives?

-jake

On Wed, Oct 28, 2009 at 7:12 PM, Michel Nadeau <akaris [at] gmail> wrote:

> Hi !
>
> I spent all night trying to get a simple BooleanQuery working and I really
> can't figure out what is my problem. See this very simple program :
>
> public class test {
>
> @SuppressWarnings("deprecation")
> public static void main(String[] args) throws ParseException,
> CorruptIndexException, LockObtainFailedException, IOException
> {
> // Open index directory
> File idx = new File("d:/Java/index/");
>
> // Create IndexWriter
> IndexWriter writer = new IndexWriter(idx, new
> StandardAnalyzer(Version.LUCENE_CURRENT), true,
> IndexWriter.MaxFieldLength.LIMITED);
>
> //
> // Add some documents to the index
> //
>
> Document doc = new Document();
> doc.add(new Field("firstname", "Mike", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("phone", "111-222-3333", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("content", "blue this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
> writer.addDocument(doc);
>
> doc = new Document();
> doc.add(new Field("firstname", "Pascale", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("lastname", "Lavoie", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("phone", "333-222-1111", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("sector", "Accounting", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("group", "othergroup", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("content", "red this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
> writer.addDocument(doc);
>
> doc = new Document();
> doc.add(new Field("firstname", "Kaven", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("lastname", "Rouseau", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("phone", "222-333-1111", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("content", "red this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
> writer.addDocument(doc);
>
> doc = new Document();
> doc.add(new Field("firstname", "Mike", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("lastname", "Peters", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("phone", "444-444-4444", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("group", "othergroup", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("content", "this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
> writer.addDocument(doc);
>
> doc = new Document();
> doc.add(new Field("firstname", "Marie-Pier", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("phone", "123-123-1234", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("sector", "Accounting", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
> doc.add(new Field("content", "this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
> writer.addDocument(doc);
>
> // Optimize + close index
> writer.optimize();
> writer.close();
>
> // Open IndexReader/Searcher
> IndexReader reader = IndexReader.open(idx, true);
> Searcher searcher = new IndexSearcher(reader);
>
> //
> // Test normal query
> //
> String m_field = "content";
> String m_query = "sector:IT AND group:group";
> Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
> QueryParser parser = new QueryParser(m_field, analyzer);
> Query query = parser.parse(m_query);
>
> // Search!
> System.out.println("Normal query: " + m_query + "\n");
> doStreamingSearch(searcher, query);
>
> System.out.println("===========================================\n");
>
> //
> // Test BooleanQuery
> //
> BooleanQuery query2 = new BooleanQuery();
> query2.add(new TermQuery(new Term("sector", "IT")), Occur.MUST);
> query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);
>
> // Search!
> System.out.println("BooleanQuery:\n");
> doStreamingSearch(searcher, query2);
> System.out.println("Done!");
> }
>
> public static void doStreamingSearch(final Searcher searcher, Query
> query) throws IOException
> {
> Collector streamingHitCollector = new Collector()
> {
> private Scorer scorer;
> private int docBase;
>
> public void collect(int doc) throws IOException
> {
> Document theDoc = searcher.doc(doc);
>
> System.out.println("+ First Name =
> "+theDoc.get("firstname"));
> System.out.println("+ Last Name =
> "+theDoc.get("lastname"));
> System.out.println("+ Phone Number = "+theDoc.get("phone"));
> System.out.println("+ Sector =
> "+theDoc.get("sector"));
> System.out.println("+ Group = "+theDoc.get("group"));
> System.out.println("+ Content =
> "+theDoc.get("content"));
> System.out.println("");
> }
>
> public boolean acceptsDocsOutOfOrder()
> {
> return true;
> }
>
> public void setNextReader(IndexReader reader, int docBase)
> throws IOException
> {
> this.docBase = docBase;
> }
>
> public void setScorer(Scorer scorer) throws IOException
> {
> this.scorer = scorer;
> }
> };
> searcher.search(query, streamingHitCollector);
> }
>
> }
>
> And here's the output of the program :
>
> Normal query: *sector:IT AND group:group*
>
> + First Name = Mike
> + Last Name = Nadeau
> + Phone Number = 111-222-3333
> + Sector = IT
> + Group = group
> + Content = blue this is some content
>
> + First Name = Kaven
> + Last Name = Rouseau
> + Phone Number = 222-333-1111
> + Sector = IT
> + Group = group
> + Content = red this is some content
>
> + First Name = Marie-Pier
> + Last Name = Nadeau
> + Phone Number = 123-123-1234
> *+ Sector = Accounting*
> + Group = group
> + Content = this is some content
>
> ===========================================
>
> BooleanQuery:
>
> Done!
>
> So I have 2 important problems:
>
> (1) the normal query (sector:IT AND group:group) - how can it return
> "Marie-Pier Nadeau", which has "Accounting" as group?
>
> (2) the BooleanQuery - why isn't it returning anything?
>
> Thanks for your help!
>
> - Mike
> akaris [at] gmail
>


akaris at gmail

Oct 28, 2009, 9:07 PM

Post #3 of 4 (441 views)
Permalink
Re: Lucene 2.9.0 / BooleanQuery problem [In reply to]

OMG, it's SO OBVIOUS!!!!! For the normal search (sector:IT AND group:group)
the problem was indeed that IT is "it", stopword. Thanks, I was so not
seeing it!

But what about the BooleanQuery? It should work fine too now...

//
// Test BooleanQuery
//
BooleanQuery query2 = new BooleanQuery();
query2.add(new TermQuery(new Term("sector", "Computing")),
Occur.MUST);
query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);

// Search!
System.out.println("BooleanQuery:\n");
doStreamingSearch(searcher, query2);
System.out.println("Done!");

Thanks,

- Mike
akaris [at] gmail


On Wed, Oct 28, 2009 at 11:57 PM, Jake Mannix <jake.mannix [at] gmail> wrote:

> Hi Michel,
>
> I don't have time to look in too much detail right now, but I'll bet ya $5
> it's because
> your query is for "sector:IT" - 'IT' lowercases to 'it' which is in the
> default stopword
> list, and if you're not careful about how you query with this, you'll end
> up
> with TermQuery
> instances which hit nothing, because this term may be stop-listed out of
> your index!
>
> Can you run the test again with no stop words in your query, and see what
> it
> gives?
>
> -jake
>
> On Wed, Oct 28, 2009 at 7:12 PM, Michel Nadeau <akaris [at] gmail> wrote:
>
> > Hi !
> >
> > I spent all night trying to get a simple BooleanQuery working and I
> really
> > can't figure out what is my problem. See this very simple program :
> >
> > public class test {
> >
> > @SuppressWarnings("deprecation")
> > public static void main(String[] args) throws ParseException,
> > CorruptIndexException, LockObtainFailedException, IOException
> > {
> > // Open index directory
> > File idx = new File("d:/Java/index/");
> >
> > // Create IndexWriter
> > IndexWriter writer = new IndexWriter(idx, new
> > StandardAnalyzer(Version.LUCENE_CURRENT), true,
> > IndexWriter.MaxFieldLength.LIMITED);
> >
> > //
> > // Add some documents to the index
> > //
> >
> > Document doc = new Document();
> > doc.add(new Field("firstname", "Mike", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("phone", "111-222-3333", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("sector", "IT", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("group", "group", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("content", "blue this is some content",
> > Field.Store.YES, Field.Index.ANALYZED));
> > writer.addDocument(doc);
> >
> > doc = new Document();
> > doc.add(new Field("firstname", "Pascale", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("lastname", "Lavoie", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("phone", "333-222-1111", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("sector", "Accounting", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("group", "othergroup", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("content", "red this is some content",
> > Field.Store.YES, Field.Index.ANALYZED));
> > writer.addDocument(doc);
> >
> > doc = new Document();
> > doc.add(new Field("firstname", "Kaven", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("lastname", "Rouseau", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("phone", "222-333-1111", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("sector", "IT", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("group", "group", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("content", "red this is some content",
> > Field.Store.YES, Field.Index.ANALYZED));
> > writer.addDocument(doc);
> >
> > doc = new Document();
> > doc.add(new Field("firstname", "Mike", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("lastname", "Peters", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("phone", "444-444-4444", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("sector", "IT", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("group", "othergroup", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("content", "this is some content",
> > Field.Store.YES, Field.Index.ANALYZED));
> > writer.addDocument(doc);
> >
> > doc = new Document();
> > doc.add(new Field("firstname", "Marie-Pier", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("phone", "123-123-1234", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("sector", "Accounting", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("group", "group", Field.Store.YES,
> > Field.Index.ANALYZED));
> > doc.add(new Field("content", "this is some content",
> > Field.Store.YES, Field.Index.ANALYZED));
> > writer.addDocument(doc);
> >
> > // Optimize + close index
> > writer.optimize();
> > writer.close();
> >
> > // Open IndexReader/Searcher
> > IndexReader reader = IndexReader.open(idx, true);
> > Searcher searcher = new IndexSearcher(reader);
> >
> > //
> > // Test normal query
> > //
> > String m_field = "content";
> > String m_query = "sector:IT AND group:group";
> > Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
> > QueryParser parser = new QueryParser(m_field, analyzer);
> > Query query = parser.parse(m_query);
> >
> > // Search!
> > System.out.println("Normal query: " + m_query + "\n");
> > doStreamingSearch(searcher, query);
> >
> >
> System.out.println("===========================================\n");
> >
> > //
> > // Test BooleanQuery
> > //
> > BooleanQuery query2 = new BooleanQuery();
> > query2.add(new TermQuery(new Term("sector", "IT")), Occur.MUST);
> > query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);
> >
> > // Search!
> > System.out.println("BooleanQuery:\n");
> > doStreamingSearch(searcher, query2);
> > System.out.println("Done!");
> > }
> >
> > public static void doStreamingSearch(final Searcher searcher, Query
> > query) throws IOException
> > {
> > Collector streamingHitCollector = new Collector()
> > {
> > private Scorer scorer;
> > private int docBase;
> >
> > public void collect(int doc) throws IOException
> > {
> > Document theDoc = searcher.doc(doc);
> >
> > System.out.println("+ First Name =
> > "+theDoc.get("firstname"));
> > System.out.println("+ Last Name =
> > "+theDoc.get("lastname"));
> > System.out.println("+ Phone Number =
> "+theDoc.get("phone"));
> > System.out.println("+ Sector =
> > "+theDoc.get("sector"));
> > System.out.println("+ Group =
> "+theDoc.get("group"));
> > System.out.println("+ Content =
> > "+theDoc.get("content"));
> > System.out.println("");
> > }
> >
> > public boolean acceptsDocsOutOfOrder()
> > {
> > return true;
> > }
> >
> > public void setNextReader(IndexReader reader, int docBase)
> > throws IOException
> > {
> > this.docBase = docBase;
> > }
> >
> > public void setScorer(Scorer scorer) throws IOException
> > {
> > this.scorer = scorer;
> > }
> > };
> > searcher.search(query, streamingHitCollector);
> > }
> >
> > }
> >
> > And here's the output of the program :
> >
> > Normal query: *sector:IT AND group:group*
> >
> > + First Name = Mike
> > + Last Name = Nadeau
> > + Phone Number = 111-222-3333
> > + Sector = IT
> > + Group = group
> > + Content = blue this is some content
> >
> > + First Name = Kaven
> > + Last Name = Rouseau
> > + Phone Number = 222-333-1111
> > + Sector = IT
> > + Group = group
> > + Content = red this is some content
> >
> > + First Name = Marie-Pier
> > + Last Name = Nadeau
> > + Phone Number = 123-123-1234
> > *+ Sector = Accounting*
> > + Group = group
> > + Content = this is some content
> >
> > ===========================================
> >
> > BooleanQuery:
> >
> > Done!
> >
> > So I have 2 important problems:
> >
> > (1) the normal query (sector:IT AND group:group) - how can it return
> > "Marie-Pier Nadeau", which has "Accounting" as group?
> >
> > (2) the BooleanQuery - why isn't it returning anything?
> >
> > Thanks for your help!
> >
> > - Mike
> > akaris [at] gmail
> >
>


uwe at thetaphi

Oct 29, 2009, 12:28 AM

Post #4 of 4 (447 views)
Permalink
RE: Lucene 2.9.0 / BooleanQuery problem [In reply to]

The BooleanQuery Does not work, because the Sector field is analyzed and you
are searching with a simple TermQuery which is not anylzed. So "Computing"
is not lowercased and will not hit any terms (try luke and look into your
terms you have indexed). Such field like the "sector" one should be made
NOT_ANYLZED during indexing when intended to be hit with TermQueries that
are case-sensitive.

Uwe

-----
Uwe Schindler
H.-H.-Meier-Allee 63, D-28213 Bremen
http://www.thetaphi.de
eMail: uwe [at] thetaphi

> -----Original Message-----
> From: Michel Nadeau [mailto:akaris [at] gmail]
> Sent: Thursday, October 29, 2009 5:08 AM
> To: java-user [at] lucene
> Subject: Re: Lucene 2.9.0 / BooleanQuery problem
>
> OMG, it's SO OBVIOUS!!!!! For the normal search (sector:IT AND
> group:group)
> the problem was indeed that IT is "it", stopword. Thanks, I was so not
> seeing it!
>
> But what about the BooleanQuery? It should work fine too now...
>
> //
> // Test BooleanQuery
> //
> BooleanQuery query2 = new BooleanQuery();
> query2.add(new TermQuery(new Term("sector", "Computing")),
> Occur.MUST);
> query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);
>
> // Search!
> System.out.println("BooleanQuery:\n");
> doStreamingSearch(searcher, query2);
> System.out.println("Done!");
>
> Thanks,
>
> - Mike
> akaris [at] gmail
>
>
> On Wed, Oct 28, 2009 at 11:57 PM, Jake Mannix <jake.mannix [at] gmail>
> wrote:
>
> > Hi Michel,
> >
> > I don't have time to look in too much detail right now, but I'll bet ya
> $5
> > it's because
> > your query is for "sector:IT" - 'IT' lowercases to 'it' which is in the
> > default stopword
> > list, and if you're not careful about how you query with this, you'll
> end
> > up
> > with TermQuery
> > instances which hit nothing, because this term may be stop-listed out of
> > your index!
> >
> > Can you run the test again with no stop words in your query, and see
> what
> > it
> > gives?
> >
> > -jake
> >
> > On Wed, Oct 28, 2009 at 7:12 PM, Michel Nadeau <akaris [at] gmail> wrote:
> >
> > > Hi !
> > >
> > > I spent all night trying to get a simple BooleanQuery working and I
> > really
> > > can't figure out what is my problem. See this very simple program :
> > >
> > > public class test {
> > >
> > > @SuppressWarnings("deprecation")
> > > public static void main(String[] args) throws ParseException,
> > > CorruptIndexException, LockObtainFailedException, IOException
> > > {
> > > // Open index directory
> > > File idx = new File("d:/Java/index/");
> > >
> > > // Create IndexWriter
> > > IndexWriter writer = new IndexWriter(idx, new
> > > StandardAnalyzer(Version.LUCENE_CURRENT), true,
> > > IndexWriter.MaxFieldLength.LIMITED);
> > >
> > > //
> > > // Add some documents to the index
> > > //
> > >
> > > Document doc = new Document();
> > > doc.add(new Field("firstname", "Mike", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("phone", "111-222-3333", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("sector", "IT", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("group", "group", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("content", "blue this is some content",
> > > Field.Store.YES, Field.Index.ANALYZED));
> > > writer.addDocument(doc);
> > >
> > > doc = new Document();
> > > doc.add(new Field("firstname", "Pascale", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("lastname", "Lavoie", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("phone", "333-222-1111", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("sector", "Accounting", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("group", "othergroup", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("content", "red this is some content",
> > > Field.Store.YES, Field.Index.ANALYZED));
> > > writer.addDocument(doc);
> > >
> > > doc = new Document();
> > > doc.add(new Field("firstname", "Kaven", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("lastname", "Rouseau", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("phone", "222-333-1111", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("sector", "IT", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("group", "group", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("content", "red this is some content",
> > > Field.Store.YES, Field.Index.ANALYZED));
> > > writer.addDocument(doc);
> > >
> > > doc = new Document();
> > > doc.add(new Field("firstname", "Mike", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("lastname", "Peters", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("phone", "444-444-4444", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("sector", "IT", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("group", "othergroup", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("content", "this is some content",
> > > Field.Store.YES, Field.Index.ANALYZED));
> > > writer.addDocument(doc);
> > >
> > > doc = new Document();
> > > doc.add(new Field("firstname", "Marie-Pier", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("phone", "123-123-1234", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("sector", "Accounting", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("group", "group", Field.Store.YES,
> > > Field.Index.ANALYZED));
> > > doc.add(new Field("content", "this is some content",
> > > Field.Store.YES, Field.Index.ANALYZED));
> > > writer.addDocument(doc);
> > >
> > > // Optimize + close index
> > > writer.optimize();
> > > writer.close();
> > >
> > > // Open IndexReader/Searcher
> > > IndexReader reader = IndexReader.open(idx, true);
> > > Searcher searcher = new IndexSearcher(reader);
> > >
> > > //
> > > // Test normal query
> > > //
> > > String m_field = "content";
> > > String m_query = "sector:IT AND group:group";
> > > Analyzer analyzer = new
> StandardAnalyzer(Version.LUCENE_CURRENT);
> > > QueryParser parser = new QueryParser(m_field, analyzer);
> > > Query query = parser.parse(m_query);
> > >
> > > // Search!
> > > System.out.println("Normal query: " + m_query + "\n");
> > > doStreamingSearch(searcher, query);
> > >
> > >
> > System.out.println("===========================================\n");
> > >
> > > //
> > > // Test BooleanQuery
> > > //
> > > BooleanQuery query2 = new BooleanQuery();
> > > query2.add(new TermQuery(new Term("sector", "IT")),
> Occur.MUST);
> > > query2.add(new TermQuery(new Term("group", "group")),
> Occur.MUST);
> > >
> > > // Search!
> > > System.out.println("BooleanQuery:\n");
> > > doStreamingSearch(searcher, query2);
> > > System.out.println("Done!");
> > > }
> > >
> > > public static void doStreamingSearch(final Searcher searcher, Query
> > > query) throws IOException
> > > {
> > > Collector streamingHitCollector = new Collector()
> > > {
> > > private Scorer scorer;
> > > private int docBase;
> > >
> > > public void collect(int doc) throws IOException
> > > {
> > > Document theDoc = searcher.doc(doc);
> > >
> > > System.out.println("+ First Name =
> > > "+theDoc.get("firstname"));
> > > System.out.println("+ Last Name =
> > > "+theDoc.get("lastname"));
> > > System.out.println("+ Phone Number =
> > "+theDoc.get("phone"));
> > > System.out.println("+ Sector =
> > > "+theDoc.get("sector"));
> > > System.out.println("+ Group =
> > "+theDoc.get("group"));
> > > System.out.println("+ Content =
> > > "+theDoc.get("content"));
> > > System.out.println("");
> > > }
> > >
> > > public boolean acceptsDocsOutOfOrder()
> > > {
> > > return true;
> > > }
> > >
> > > public void setNextReader(IndexReader reader, int docBase)
> > > throws IOException
> > > {
> > > this.docBase = docBase;
> > > }
> > >
> > > public void setScorer(Scorer scorer) throws IOException
> > > {
> > > this.scorer = scorer;
> > > }
> > > };
> > > searcher.search(query, streamingHitCollector);
> > > }
> > >
> > > }
> > >
> > > And here's the output of the program :
> > >
> > > Normal query: *sector:IT AND group:group*
> > >
> > > + First Name = Mike
> > > + Last Name = Nadeau
> > > + Phone Number = 111-222-3333
> > > + Sector = IT
> > > + Group = group
> > > + Content = blue this is some content
> > >
> > > + First Name = Kaven
> > > + Last Name = Rouseau
> > > + Phone Number = 222-333-1111
> > > + Sector = IT
> > > + Group = group
> > > + Content = red this is some content
> > >
> > > + First Name = Marie-Pier
> > > + Last Name = Nadeau
> > > + Phone Number = 123-123-1234
> > > *+ Sector = Accounting*
> > > + Group = group
> > > + Content = this is some content
> > >
> > > ===========================================
> > >
> > > BooleanQuery:
> > >
> > > Done!
> > >
> > > So I have 2 important problems:
> > >
> > > (1) the normal query (sector:IT AND group:group) - how can it return
> > > "Marie-Pier Nadeau", which has "Accounting" as group?
> > >
> > > (2) the BooleanQuery - why isn't it returning anything?
> > >
> > > Thanks for your help!
> > >
> > > - Mike
> > > akaris [at] gmail
> > >
> >


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe [at] lucene
For additional commands, e-mail: java-user-help [at] lucene

Lucene java-user RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.