Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Wikipedia: Mediawiki-CVS

SVN: [59327] branches/lucene-search-2.1

 

 

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded


rainman at svn

Nov 21, 2009, 6:46 PM

Post #1 of 1 (54 views)
Permalink
SVN: [59327] branches/lucene-search-2.1

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/59327

Revision: 59327
Author: rainman
Date: 2009-11-22 02:46:13 +0000 (Sun, 22 Nov 2009)

Log Message:
-----------
Add yiddish exceptions and some yiddish tests

Modified Paths:
--------------
branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java
branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java

Modified: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java
===================================================================
--- branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java 2009-11-22 01:24:50 UTC (rev 59326)
+++ branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java 2009-11-22 02:46:13 UTC (rev 59327)
@@ -97,6 +97,13 @@
}
in.close();

+ // add some exception requested by users
+ // yiddish stuffs
+ combining[0x05B7] = true;
+ combining[0x05B8] = true;
+ combining[0x05BC] = true;
+ combining[0x05BF] = true;
+
// decomposition table
char[][] table = new char[65536][];

@@ -134,6 +141,29 @@
}
}
}
+
+ // some decomposition exceptions
+ // yiddish stuffs
+ table[0x05F0] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE VAV
+ table[0x05F0][0] = 0x05D5;
+ table[0x05F0][1] = 0x05D5;
+
+ table[0x05F1] = new char[2]; // HEBREW LIGATURE YIDDISH VAV YOD
+ table[0x05F1][0] = 0x05D5;
+ table[0x05F1][1] = 0x05D9;
+
+ table[0x05F2] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE YOD
+ table[0x05F2][0] = 0x05D9;
+ table[0x05F2][1] = 0x05D9;
+
+ table[0xFB1F] = new char[2]; // HEBREW LIGATURE YIDDISH YOD YOD PATAH
+ table[0xFB1F][0] = 0x05D9;
+ table[0xFB1F][1] = 0x05D9;
+
+ table[0xFB1D] = new char[1]; // HEBREW LETTER YOD WITH HIRIQ
+ table[0xFB1D][0] = 0x05D9;
+
+
// using decomposition table recursively decompose characters
for(int ich = 0; ich <= 0xFFFF; ich++){
if(table[ich]==null)

Modified: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java
===================================================================
--- branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java 2009-11-22 01:24:50 UTC (rev 59326)
+++ branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java 2009-11-22 02:46:13 UTC (rev 59327)
@@ -21,6 +21,8 @@
import junit.framework.TestCase;

public class WikiQueryParserTest extends WikiTestCase {
+
+

public void testEnglish() {
IndexId enwiki = IndexId.get("enwiki");
@@ -136,11 +138,22 @@

tokens = parser.tokenizeForSpellCheck("+incategory:\"Suspension bridges in the United States\"");
assertEquals("[]", tokens.toString());
-

+ /* ================== unicode decomposition stuffs ============ */
+ q = parser.parseRaw("šta");
+ assertEquals("contents:šta contents:sta^0.5",q.toString());
+
+ q = parser.parseRaw("װאנט");
+ assertEquals("contents:װאנט contents:וואנט^0.5",q.toString());
+
+ q = parser.parseRaw("פּאריז");
+ assertEquals("contents:פּאריז contents:פאריז^0.5",q.toString());
+
+
} catch(Exception e){
}
}
+

public void XtestEnglishFull() {
IndexId enwiki = IndexId.get("enwiki");



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS [at] lists
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact lists@gossamer-threads.com
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.