
rainman at svn
Nov 21, 2009, 6:46 PM
Post #1 of 1
(54 views)
Permalink
|
|
SVN: [59327] branches/lucene-search-2.1
|
|
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/59327 Revision: 59327 Author: rainman Date: 2009-11-22 02:46:13 +0000 (Sun, 22 Nov 2009) Log Message: ----------- Add yiddish exceptions and some yiddish tests Modified Paths: -------------- branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java Modified: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java =================================================================== --- branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java 2009-11-22 01:24:50 UTC (rev 59326) +++ branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java 2009-11-22 02:46:13 UTC (rev 59327) @@ -97,6 +97,13 @@ } in.close(); + // add some exception requested by users + // yiddish stuffs + combining[0x05B7] = true; + combining[0x05B8] = true; + combining[0x05BC] = true; + combining[0x05BF] = true; + // decomposition table char[][] table = new char[65536][]; @@ -134,6 +141,29 @@ } } } + + // some decomposition exceptions + // yiddish stuffs + table[0x05F0] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE VAV + table[0x05F0][0] = 0x05D5; + table[0x05F0][1] = 0x05D5; + + table[0x05F1] = new char[2]; // HEBREW LIGATURE YIDDISH VAV YOD + table[0x05F1][0] = 0x05D5; + table[0x05F1][1] = 0x05D9; + + table[0x05F2] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE YOD + table[0x05F2][0] = 0x05D9; + table[0x05F2][1] = 0x05D9; + + table[0xFB1F] = new char[2]; // HEBREW LIGATURE YIDDISH YOD YOD PATAH + table[0xFB1F][0] = 0x05D9; + table[0xFB1F][1] = 0x05D9; + + table[0xFB1D] = new char[1]; // HEBREW LETTER YOD WITH HIRIQ + table[0xFB1D][0] = 0x05D9; + + // using decomposition table recursively decompose characters for(int ich = 0; ich <= 0xFFFF; ich++){ if(table[ich]==null) Modified: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java =================================================================== --- branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java 2009-11-22 01:24:50 UTC (rev 59326) +++ branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java 2009-11-22 02:46:13 UTC (rev 59327) @@ -21,6 +21,8 @@ import junit.framework.TestCase; public class WikiQueryParserTest extends WikiTestCase { + + public void testEnglish() { IndexId enwiki = IndexId.get("enwiki"); @@ -136,11 +138,22 @@ tokens = parser.tokenizeForSpellCheck("+incategory:\"Suspension bridges in the United States\""); assertEquals("[]", tokens.toString()); - + /* ================== unicode decomposition stuffs ============ */ + q = parser.parseRaw("šta"); + assertEquals("contents:šta contents:sta^0.5",q.toString()); + + q = parser.parseRaw("װאנט"); + assertEquals("contents:װאנט contents:וואנט^0.5",q.toString()); + + q = parser.parseRaw("פּאריז"); + assertEquals("contents:פּאריז contents:פאריז^0.5",q.toString()); + + } catch(Exception e){ } } + public void XtestEnglishFull() { IndexId enwiki = IndexId.get("enwiki"); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS [at] lists https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|