
daniel at svn
Nov 24, 2009, 9:12 AM
Post #1 of 1
(122 views)
Permalink
|
|
SVN: [59383] trunk/WikiWord/WikiWord
|
|
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/59383 Revision: 59383 Author: daniel Date: 2009-11-24 17:12:29 +0000 (Tue, 24 Nov 2009) Log Message: ----------- feature vector cache (INCOMPLETE, does not compile) Modified Paths: -------------- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java Added Paths: ----------- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -19,15 +19,15 @@ import de.brightbyte.wikiword.model.WikiWordRanking; import de.brightbyte.wikiword.store.LocalConceptStore; -public class CoherenceDisambiguator<K> extends AbstractDisambiguator { +public class CoherenceDisambiguator extends AbstractDisambiguator { protected int frequencyThreshold = 2; //FIXME: use complex cutoff specifier! protected double scoreThreshold = 0.002; protected double popularityBias = 0.01; - protected Similarity<LabeledVector<K>> similarityMeasure; - protected FeatureFetcher<K> featureFetcher; + protected Similarity<LabeledVector<Integer>> similarityMeasure; + protected FeatureFetcher<LocalConcept> featureFetcher; - public CoherenceDisambiguator(LocalConceptStore conceptStore, FeatureFetcher<K> featureFetcher, Similarity<LabeledVector<K>> sim) { + public CoherenceDisambiguator(LocalConceptStore conceptStore, FeatureFetcher<LocalConcept> featureFetcher, Similarity<LabeledVector<Integer>> sim) { super(conceptStore); if (sim==null) throw new NullPointerException(); @@ -40,16 +40,16 @@ return featureFetcher; } - public void setFeatureFetcher(FeatureFetcher<K> featureFetcher) { + public void setFeatureFetcher(FeatureFetcher<LocalConcept> featureFetcher) { this.featureFetcher = featureFetcher; } - public Similarity<LabeledVector<K>> getSimilarityMeasure() { + public Similarity<LabeledVector<Integer>> getSimilarityMeasure() { return similarityMeasure; } public void setSimilarityMeasure( - Similarity<LabeledVector<K>> similarityMeasure) { + Similarity<LabeledVector<Integer>> similarityMeasure) { if (similarityMeasure==null) throw new NullPointerException(); this.similarityMeasure = similarityMeasure; } @@ -229,10 +229,10 @@ d = similarities.get(a, b); } else { - LabeledVector<K> fa = featureFetcher.getFeatures(a); - LabeledVector<K> fb = featureFetcher.getFeatures(b); + ConceptFeatures<LocalConcept> fa = featureFetcher.getFeatures(a); + ConceptFeatures<LocalConcept> fb = featureFetcher.getFeatures(b); - d = similarityMeasure.similarity(fa, fb); + d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector()); similarities.set(a, b, d); } } Added: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java (rev 0) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -0,0 +1,93 @@ +package de.brightbyte.wikiword.disambig; + +import de.brightbyte.data.LabeledVector; +import de.brightbyte.data.MapLabeledVector; +import de.brightbyte.wikiword.model.WikiWordConcept; +import de.brightbyte.wikiword.model.WikiWordConceptReference; + +public class ConceptFeatures<C extends WikiWordConcept> { + protected LabeledVector<Integer> features; + protected WikiWordConceptReference<C> reference; + + public ConceptFeatures(WikiWordConceptReference<C> reference, LabeledVector<Integer> features) { + this.features = features; + this.reference = reference; + } + + public ConceptFeatures(WikiWordConceptReference<C> reference, byte[] features) { + this(reference, unserializeVector(features)); + } + + public String toString() { + return reference+ ":"+features; + } + public LabeledVector<Integer> getFeatureVector() { + return features; + } + + public WikiWordConceptReference<C> getConceptReference() { + return reference; + } + + public int getConceptId() { + return reference.getId(); + } + + public byte[] getFeatureVectorData() { + return serializeVector(features); + } + + protected static byte[] serializeVector(LabeledVector<Integer> v) { + int c = v.size(); + byte[] data = new byte[c*4 + c*8]; + + int i = 0; + for (Integer k: v.labels()) { + int id = k.intValue(); + double d = v.get(k); + long b = Double.doubleToLongBits(d); + + data[i++] = (byte)(id & 0xFF); + data[i++] = (byte)(id >>> 8 & 0xFF); + data[i++] = (byte)(id >>> 16 & 0xFF); + data[i++] = (byte)(id >>> 24 & 0xFF); + + data[i++] = (byte)(b & 0xFF); + data[i++] = (byte)(b >>> 8 & 0xFF); + data[i++] = (byte)(b >>> 16 & 0xFF); + data[i++] = (byte)(b >>> 24 & 0xFF); + data[i++] = (byte)(b >>> 32 & 0xFF); + data[i++] = (byte)(b >>> 40 & 0xFF); + data[i++] = (byte)(b >>> 48 & 0xFF); + data[i++] = (byte)(b >>> 56 & 0xFF); + } + + return data; + } + + protected static LabeledVector<Integer> unserializeVector(byte[] data) { + LabeledVector<Integer> v = new MapLabeledVector<Integer>(); + + for (int i = 0; i<data.length; ) { + int id = (data[i++] & 0xFF) ; + id |= (data[i++] & 0xFF) << 8; + id |= (data[i++] & 0xFF) << 16; + id |= (data[i++] & 0xFF) << 24; + + long b = (data[i++] & 0xFFL); + b |= (data[i++] & 0xFFL) << 8; + b |= (data[i++] & 0xFFL) << 16; + b |= (data[i++] & 0xFFL) << 24; + b |= (data[i++] & 0xFFL) << 32; + b |= (data[i++] & 0xFFL) << 40; + b |= (data[i++] & 0xFFL) << 48; + b |= (data[i++] & 0xFFL) << 56; + + double d = Double.longBitsToDouble(b); + v.set(id, d); + } + + return v; + } + +} Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -7,14 +7,14 @@ import de.brightbyte.wikiword.model.WikiWordConcept; import de.brightbyte.wikiword.model.WikiWordConceptReference; -public class ConceptRelatedness<K> implements Similarity<WikiWordConcept> { +public class ConceptRelatedness<C extends WikiWordConcept> implements Similarity<C> { - public static class Relatedness { + public static class Relatedness<C extends WikiWordConcept> { public final double relatedness; - public final WikiWordConceptReference a; - public final WikiWordConceptReference b; + public final WikiWordConceptReference<C> a; + public final WikiWordConceptReference<C> b; - public Relatedness(final double relatedness, final WikiWordConceptReference a, final WikiWordConceptReference b) { + public Relatedness(final double relatedness, final WikiWordConceptReference<C> a, final WikiWordConceptReference<C> b) { super(); this.relatedness = relatedness; this.a = a; @@ -27,25 +27,25 @@ } } - protected Similarity<LabeledVector<K>> similarityMeasure; - protected FeatureFetcher<K> featureFetcher; + protected Similarity<LabeledVector<Integer>> similarityMeasure; + protected FeatureFetcher<C> featureFetcher; - public ConceptRelatedness(Similarity<LabeledVector<K>> similarityMeasure, FeatureFetcher<K> featureFetcher) { + public ConceptRelatedness(Similarity<LabeledVector<Integer>> similarityMeasure, FeatureFetcher<C> featureFetcher) { this.similarityMeasure = similarityMeasure; this.featureFetcher = featureFetcher; } - public Relatedness relatedness(WikiWordConcept a, WikiWordConcept b) { + public Relatedness relatedness(C a, C b) { double d = similarity(a, b); - return new Relatedness(d, a.getReference(), b.getReference()); + return new Relatedness<C>(d, a.getReference(), b.getReference()); } - public double similarity(WikiWordConcept a, WikiWordConcept b) { + public double similarity(C a, C b) { try { - LabeledVector<K> fa = featureFetcher.getFeatures(a); - LabeledVector<K> fb = featureFetcher.getFeatures(b); + ConceptFeatures<C> fa = featureFetcher.getFeatures(a); + ConceptFeatures<C> fb = featureFetcher.getFeatures(b); - double d = similarityMeasure.similarity(fa, fb); + double d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector()); return d; } catch (PersistenceException e) { throw new UncheckedPersistenceException(e); Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -1,9 +1,8 @@ package de.brightbyte.wikiword.disambig; -import de.brightbyte.data.LabeledVector; import de.brightbyte.util.PersistenceException; import de.brightbyte.wikiword.model.WikiWordConcept; -public interface FeatureFetcher<K> { - public LabeledVector<K> getFeatures(WikiWordConcept c) throws PersistenceException; +public interface FeatureFetcher<C extends WikiWordConcept> { + public ConceptFeatures<C> getFeatures(C c) throws PersistenceException; } Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -7,7 +7,7 @@ import de.brightbyte.wikiword.model.WikiWordConceptReference; import de.brightbyte.wikiword.store.WikiWordConceptStore; -public class LinkFeatureFetcher implements FeatureFetcher<Integer> { +public class LinkFeatureFetcher<C extends WikiWordConcept> implements FeatureFetcher<C> { protected boolean useRelevance; protected boolean useCardinality; @@ -33,7 +33,7 @@ return x*y; } - public LabeledVector<Integer> getFeatures(WikiWordConcept c) throws PersistenceException { + public ConceptFeatures<C> getFeatures(WikiWordConcept c) throws PersistenceException { LabeledVector<Integer> features = new MapLabeledVector<Integer>(); //XXX: magic numbers! @@ -72,7 +72,7 @@ //XXX: compare cooccurrances (i.e. eval second level cooc) - return features; + return new ConceptFeatures<C>(c.getReference(), features); } private Integer getLabel(WikiWordConceptReference r) { Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -49,6 +49,7 @@ protected EntityTable conceptInfoTable; protected EntityTable conceptDescriptionTable; + protected EntityTable conceptFeaturesTable; private String fields(String... f) { if (f.length==0) return null; @@ -115,7 +116,7 @@ setGroupConcatMaxLen(listBlobSize); //TODO: if it's larger currently, don't shrink! conceptInfoTable = new EntityTable(this, "concept_info", getDefaultTableAttributes()); - conceptInfoTable.addField( new DatabaseField(this, "concept", "INT", "AUTO_INCREMENT", true, KeyType.PRIMARY ) ); + conceptInfoTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) ); conceptInfoTable.addField( new DatabaseField(this, "inlinks", getTextType(listBlobSize), null, false, null ) ); conceptInfoTable.addField( new DatabaseField(this, "outlinks", getTextType(listBlobSize), null, false, null ) ); conceptInfoTable.addField( new DatabaseField(this, "narrower", getTextType(listBlobSize), null, false, null ) ); @@ -129,11 +130,17 @@ if (description) { conceptDescriptionTable = new EntityTable(this, "concept_description", getDefaultTableAttributes()); - conceptDescriptionTable.addField( new DatabaseField(this, "concept", "INT", "AUTO_INCREMENT", true, KeyType.PRIMARY ) ); + conceptDescriptionTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) ); conceptDescriptionTable.addField( new DatabaseField(this, "terms", getTextType(listBlobSize), null, false, null ) ); conceptDescriptionTable.setAutomaticField(null); addTable(conceptDescriptionTable); } + + conceptFeaturesTable = new EntityTable(this, "concept_features", getDefaultTableAttributes()); + conceptFeaturesTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) ); + conceptFeaturesTable.addField( new DatabaseField(this, "features", getFieldType(byte[].class), null, false, null ) ); + conceptFeaturesTable.setAutomaticField(null); + addTable(conceptFeaturesTable); } @Override Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -297,22 +297,34 @@ } - public String encodeSet(ConceptType[] values) { - int[] vv = new int[values.length]; - for (int i=0; i<values.length; i++) vv[i] = values[i].getCode(); - return encodeSet(vv); + public String encodeSet(ConceptType[] values) throws PersistenceException { + try { + int[] vv = new int[values.length]; + for (int i=0; i<values.length; i++) vv[i] = values[i].getCode(); + return encodeSet(vv); + } catch (SQLException e) { + throw new PersistenceException(e); + } } - public String encodeSet(ResourceType[] values) { - int[] vv = new int[values.length]; - for (int i=0; i<values.length; i++) vv[i] = values[i].getCode(); - return encodeSet(vv); + public String encodeSet(ResourceType[] values) throws PersistenceException { + try { + int[] vv = new int[values.length]; + for (int i=0; i<values.length; i++) vv[i] = values[i].getCode(); + return encodeSet(vv); + } catch (SQLException e) { + throw new PersistenceException(e); + } } - public String encodeSet(Namespace[] values) { - int[] vv = new int[values.length]; - for (int i=0; i<values.length; i++) vv[i] = values[i].getNumber(); - return encodeSet(vv); + public String encodeSet(Namespace[] values) throws PersistenceException { + try { + int[] vv = new int[values.length]; + for (int i=0; i<values.length; i++) vv[i] = values[i].getNumber(); + return encodeSet(vv); + } catch (SQLException e) { + throw new PersistenceException(e); + } } Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -376,9 +376,13 @@ public DataSet<C> getConcepts(int[] ids) throws PersistenceException { - String sql = conceptSelect("-1"); - sql += " WHERE C.id IN " + database.encodeSet(ids); - return new QueryDataSet<C>(database, new ConceptFactory(), "getConcepts", sql, false); + try { + String sql = conceptSelect("-1"); + sql += " WHERE C.id IN " + database.encodeSet(ids); + return new QueryDataSet<C>(database, new ConceptFactory(), "getConcepts", sql, false); + } catch (SQLException e) { + throw new PersistenceException(e); + } } protected abstract C newConcept(Map<String, Object> data) throws PersistenceException; Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -2,8 +2,13 @@ import java.io.IOException; +import de.brightbyte.data.cursor.CursorProcessor; +import de.brightbyte.data.cursor.DataCursor; import de.brightbyte.util.PersistenceException; +import de.brightbyte.wikiword.disambig.ConceptFeatures; +import de.brightbyte.wikiword.disambig.FeatureFetcher; import de.brightbyte.wikiword.model.WikiWordConcept; +import de.brightbyte.wikiword.model.WikiWordConceptReference; import de.brightbyte.wikiword.store.builder.ConceptInfoStoreBuilder; import de.brightbyte.wikiword.store.builder.WikiWordConceptStoreBuilder; @@ -12,9 +17,9 @@ * ImportDump can be invoked as a standalone program, use --help as a * command line parameter for usage information. */ -public class BuildConceptInfo extends ImportApp<WikiWordConceptStoreBuilder<? extends WikiWordConcept>> { +public class BuildConceptInfo<C extends WikiWordConcept> extends ImportApp<WikiWordConceptStoreBuilder<C>> { - protected ConceptInfoStoreBuilder infoStore; + protected ConceptInfoStoreBuilder<C> infoStore; public BuildConceptInfo() { super("BuildConceptInfo", true, true); @@ -39,10 +44,38 @@ @Override protected void run() throws Exception { - section("-- build info --------------------------------------------------"); + section("-- build concept property cache --------------------------------------------------"); this.infoStore.buildConceptInfo(); + + section("-- build concept feature vector cache --------------------------------------------------"); + if (agenda.beginTask("buildConceptInfo", "buildConceptFeatureVectors")) { + //TODO: cleanup incomplete run + buildConceptFeatureVectors(); + agenda.endTask("buildConceptInfo", "buildConceptFeatureVectors"); + } } + protected FeatureFetcher<C> featureFetcher; + + private void buildConceptFeatureVectors() { + CursorProcessor<C> p = new CursorProcessor<C>() { + + public void process(DataCursor<C> c) throws Exception { + + C r; + while ((r = c.next())!=null) { + ConceptFeatures<C> features = featureFetcher.getFeatures(r); + infoStore.storeConceptFeatures(features); + } + + infoStore.flush(); + } + + }; + + conceptStore.processConcepts(p); + } + public static void main(String[] argv) throws Exception { BuildConceptInfo app = new BuildConceptInfo(); app.launch(argv); Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -1,8 +1,18 @@ package de.brightbyte.wikiword.store.builder; +import de.brightbyte.data.cursor.CursorProcessor; import de.brightbyte.util.PersistenceException; +import de.brightbyte.wikiword.disambig.ConceptFeatures; import de.brightbyte.wikiword.model.WikiWordConcept; +import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec; public interface ConceptInfoStoreBuilder<C extends WikiWordConcept> extends WikiWordStoreBuilder { public void buildConceptInfo() throws PersistenceException; + + public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField, + ReferenceListEntrySpec spec, String threshold) throws PersistenceException; + + public void storeConceptFeatures(ConceptFeatures<C> features) throws PersistenceException; + + public int processConcepts(CursorProcessor<C> processor) throws PersistenceException; } Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -1397,6 +1397,23 @@ } } + + protected static DatabaseDataSet.Factory<LocalConceptReference> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConceptReference>() { + public LocalConceptReference newInstance(ResultSet row) throws SQLException, PersistenceException { + int id = row.getInt("id"); + String name = asString(row.getObject("name")); + + return new LocalConceptReference(id, name, -1, -1); + } + }; + + public int processConcepts(final CursorProcessor<LocalConceptReference> processor) throws PersistenceException { + String sql = "SELECT * FROM "+conceptTable.getSQLName(); + String where = "type = "+ConceptType.UNKNOWN.getCode(); + + DatabaseAccess.SimpleChunkedQuery query = new DatabaseAccess.SimpleChunkedQuery(getDatabaseAccess(), "processUnknownConcepts", "process", sql, where, null, conceptTable, "id"); + return executeChunkedQuery(query, 1, localConceptReferenceFactory, processor); + } } ////////////////////////////////////////////////////////////////////////////// Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -3,6 +3,7 @@ import static de.brightbyte.db.DatabaseUtil.asInt; import static de.brightbyte.db.DatabaseUtil.asString; +import java.sql.Blob; import java.sql.ResultSet; import java.sql.SQLException; import java.util.HashMap; @@ -26,6 +27,7 @@ import de.brightbyte.util.Processor; import de.brightbyte.wikiword.ConceptType; import de.brightbyte.wikiword.TweakSet; +import de.brightbyte.wikiword.disambig.ConceptFeatures; import de.brightbyte.wikiword.model.WikiWordConcept; import de.brightbyte.wikiword.model.WikiWordConceptReference; import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema; @@ -624,13 +626,17 @@ } protected void storeStatsEntry(String block, String name, double value) throws PersistenceException { - //TODO: inserter?... - String sql = "REPLACE INTO "+statsTable.getSQLName()+" (block, name, value) VALUES (" - +database.encodeValue(block)+", " - +database.encodeValue(name)+", " - +database.encodeValue(value)+") "; - - executeUpdate("storeStatsEntry", sql); + try { + //TODO: inserter?... + String sql = "REPLACE INTO "+statsTable.getSQLName()+" (block, name, value) VALUES (" + +database.encodeValue(block)+", " + +database.encodeValue(name)+", " + +database.encodeValue(value)+") "; + + executeUpdate("storeStatsEntry", sql); + } catch (SQLException e) { + throw new PersistenceException(e); + } } protected void storeStatsEntries(String block, ResultSet rs, GroupNameTranslator translator) throws PersistenceException { @@ -679,13 +685,17 @@ String name = e.getKey(); double value = e.getValue().doubleValue(); - sql.append( "(" ); - sql.append(database.encodeValue(block)); - sql.append(", "); - sql.append(database.encodeValue(name)); - sql.append(", "); - sql.append(database.encodeValue(value)); - sql.append( ")" ); + try { + sql.append( "(" ); + sql.append(database.encodeValue(block)); + sql.append(", "); + sql.append(database.encodeValue(name)); + sql.append(", "); + sql.append(database.encodeValue(value)); + sql.append( ")" ); + } catch (SQLException e1) { + throw new PersistenceException(e1); + } } executeUpdate("storeStatsEntries", sql.toString()); @@ -776,12 +786,18 @@ protected WikiWordConceptStoreSchema conceptDatabase; protected EntityTable conceptInfoTable; + protected EntityTable conceptFeaturesTable; + protected Inserter conceptFeaturesInserter; protected DatabaseConceptInfoStoreBuilder(ConceptInfoStoreSchema database, TweakSet tweaks, Agenda agenda) throws SQLException { super(database, tweaks, agenda); Inserter conceptInfoInserter = configureTable("concept_info", 64, 1024); conceptInfoTable = (EntityTable)conceptInfoInserter.getTable(); + + conceptFeaturesInserter = configureTable("concept_features", 64, 1024); + conceptFeaturesInserter.setLenient(true); //ignore dupes. //TODO: replace instead! + conceptFeaturesTable = (EntityTable)conceptFeaturesInserter.getTable(); } public void buildConceptInfo() throws PersistenceException { @@ -848,6 +864,11 @@ return executeChunkedUpdate("prepareConceptCache", cacheTable.getName()+"."+conceptIdField, sql, null, t, "id"); } + public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField, + ReferenceListEntrySpec spec, String threshold) throws PersistenceException { + return buildConceptPropertyCache(conceptInfoTable, "concept", targetField, propertyTable, propertyConceptField, spec, false, threshold, 1); + } + protected int buildConceptPropertyCache( final DatabaseTable cacheTable, final String cacheIdField, final String propertyField, final String realtion, final String relConceptField, @@ -889,5 +910,19 @@ return executeChunkedUpdate(query, chunkFactor); } + /** + * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeRawText(int, java.lang.String) + */ + public void storeConceptFeatures(ConceptFeatures<T> features) throws PersistenceException { + try { + if (conceptFeaturesInserter==null) conceptFeaturesInserter = conceptFeaturesTable.getInserter(); + + conceptFeaturesInserter.updateInt("concept", features.getConceptId()); + conceptFeaturesInserter.updateBlob("features", features.getFeatureVectorData()); + conceptFeaturesInserter.updateRow(); + } catch (SQLException e) { + throw new PersistenceException(e); + } + } } } Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java =================================================================== --- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382) +++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383) @@ -18,10 +18,12 @@ import de.brightbyte.wikiword.DatasetIdentifier; import de.brightbyte.wikiword.ExtractionRule; import de.brightbyte.wikiword.ResourceType; +import de.brightbyte.wikiword.disambig.ConceptFeatures; import de.brightbyte.wikiword.model.LocalConcept; import de.brightbyte.wikiword.model.LocalConceptReference; import de.brightbyte.wikiword.model.WikiWordConceptReference; import de.brightbyte.wikiword.schema.AliasScope; +import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec; import de.brightbyte.wikiword.store.GroupNameTranslator; import de.brightbyte.wikiword.store.WikiWordConceptStore; @@ -420,6 +422,15 @@ public DatasetIdentifier getDatasetIdentifier() { return dataset; } + + public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField, ReferenceListEntrySpec spec, String threshold) throws PersistenceException { + // TODO Auto-generated method stub + return 0; + } + + public void storeConceptFeatures(ConceptFeatures<LocalConcept> features) throws PersistenceException { + log("+ storeConceptFeatures: concept = "+features.getConceptId()+", features = "+features.getFeatureVector()); + } } _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS [at] lists https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|