Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Wikipedia: Mediawiki-CVS

SVN: [59383] trunk/WikiWord/WikiWord

 

 

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded


daniel at svn

Nov 24, 2009, 9:12 AM

Post #1 of 1 (122 views)
Permalink
SVN: [59383] trunk/WikiWord/WikiWord

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/59383

Revision: 59383
Author: daniel
Date: 2009-11-24 17:12:29 +0000 (Tue, 24 Nov 2009)

Log Message:
-----------
feature vector cache (INCOMPLETE, does not compile)

Modified Paths:
--------------
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java

Added Paths:
-----------
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -19,15 +19,15 @@
import de.brightbyte.wikiword.model.WikiWordRanking;
import de.brightbyte.wikiword.store.LocalConceptStore;

-public class CoherenceDisambiguator<K> extends AbstractDisambiguator {
+public class CoherenceDisambiguator extends AbstractDisambiguator {

protected int frequencyThreshold = 2; //FIXME: use complex cutoff specifier!
protected double scoreThreshold = 0.002;
protected double popularityBias = 0.01;
- protected Similarity<LabeledVector<K>> similarityMeasure;
- protected FeatureFetcher<K> featureFetcher;
+ protected Similarity<LabeledVector<Integer>> similarityMeasure;
+ protected FeatureFetcher<LocalConcept> featureFetcher;

- public CoherenceDisambiguator(LocalConceptStore conceptStore, FeatureFetcher<K> featureFetcher, Similarity<LabeledVector<K>> sim) {
+ public CoherenceDisambiguator(LocalConceptStore conceptStore, FeatureFetcher<LocalConcept> featureFetcher, Similarity<LabeledVector<Integer>> sim) {
super(conceptStore);

if (sim==null) throw new NullPointerException();
@@ -40,16 +40,16 @@
return featureFetcher;
}

- public void setFeatureFetcher(FeatureFetcher<K> featureFetcher) {
+ public void setFeatureFetcher(FeatureFetcher<LocalConcept> featureFetcher) {
this.featureFetcher = featureFetcher;
}

- public Similarity<LabeledVector<K>> getSimilarityMeasure() {
+ public Similarity<LabeledVector<Integer>> getSimilarityMeasure() {
return similarityMeasure;
}

public void setSimilarityMeasure(
- Similarity<LabeledVector<K>> similarityMeasure) {
+ Similarity<LabeledVector<Integer>> similarityMeasure) {
if (similarityMeasure==null) throw new NullPointerException();
this.similarityMeasure = similarityMeasure;
}
@@ -229,10 +229,10 @@
d = similarities.get(a, b);
}
else {
- LabeledVector<K> fa = featureFetcher.getFeatures(a);
- LabeledVector<K> fb = featureFetcher.getFeatures(b);
+ ConceptFeatures<LocalConcept> fa = featureFetcher.getFeatures(a);
+ ConceptFeatures<LocalConcept> fb = featureFetcher.getFeatures(b);

- d = similarityMeasure.similarity(fa, fb);
+ d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector());
similarities.set(a, b, d);
}
}

Added: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java (rev 0)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptFeatures.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -0,0 +1,93 @@
+package de.brightbyte.wikiword.disambig;
+
+import de.brightbyte.data.LabeledVector;
+import de.brightbyte.data.MapLabeledVector;
+import de.brightbyte.wikiword.model.WikiWordConcept;
+import de.brightbyte.wikiword.model.WikiWordConceptReference;
+
+public class ConceptFeatures<C extends WikiWordConcept> {
+ protected LabeledVector<Integer> features;
+ protected WikiWordConceptReference<C> reference;
+
+ public ConceptFeatures(WikiWordConceptReference<C> reference, LabeledVector<Integer> features) {
+ this.features = features;
+ this.reference = reference;
+ }
+
+ public ConceptFeatures(WikiWordConceptReference<C> reference, byte[] features) {
+ this(reference, unserializeVector(features));
+ }
+
+ public String toString() {
+ return reference+ ":"+features;
+ }
+ public LabeledVector<Integer> getFeatureVector() {
+ return features;
+ }
+
+ public WikiWordConceptReference<C> getConceptReference() {
+ return reference;
+ }
+
+ public int getConceptId() {
+ return reference.getId();
+ }
+
+ public byte[] getFeatureVectorData() {
+ return serializeVector(features);
+ }
+
+ protected static byte[] serializeVector(LabeledVector<Integer> v) {
+ int c = v.size();
+ byte[] data = new byte[c*4 + c*8];
+
+ int i = 0;
+ for (Integer k: v.labels()) {
+ int id = k.intValue();
+ double d = v.get(k);
+ long b = Double.doubleToLongBits(d);
+
+ data[i++] = (byte)(id & 0xFF);
+ data[i++] = (byte)(id >>> 8 & 0xFF);
+ data[i++] = (byte)(id >>> 16 & 0xFF);
+ data[i++] = (byte)(id >>> 24 & 0xFF);
+
+ data[i++] = (byte)(b & 0xFF);
+ data[i++] = (byte)(b >>> 8 & 0xFF);
+ data[i++] = (byte)(b >>> 16 & 0xFF);
+ data[i++] = (byte)(b >>> 24 & 0xFF);
+ data[i++] = (byte)(b >>> 32 & 0xFF);
+ data[i++] = (byte)(b >>> 40 & 0xFF);
+ data[i++] = (byte)(b >>> 48 & 0xFF);
+ data[i++] = (byte)(b >>> 56 & 0xFF);
+ }
+
+ return data;
+ }
+
+ protected static LabeledVector<Integer> unserializeVector(byte[] data) {
+ LabeledVector<Integer> v = new MapLabeledVector<Integer>();
+
+ for (int i = 0; i<data.length; ) {
+ int id = (data[i++] & 0xFF) ;
+ id |= (data[i++] & 0xFF) << 8;
+ id |= (data[i++] & 0xFF) << 16;
+ id |= (data[i++] & 0xFF) << 24;
+
+ long b = (data[i++] & 0xFFL);
+ b |= (data[i++] & 0xFFL) << 8;
+ b |= (data[i++] & 0xFFL) << 16;
+ b |= (data[i++] & 0xFFL) << 24;
+ b |= (data[i++] & 0xFFL) << 32;
+ b |= (data[i++] & 0xFFL) << 40;
+ b |= (data[i++] & 0xFFL) << 48;
+ b |= (data[i++] & 0xFFL) << 56;
+
+ double d = Double.longBitsToDouble(b);
+ v.set(id, d);
+ }
+
+ return v;
+ }
+
+}

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ConceptRelatedness.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -7,14 +7,14 @@
import de.brightbyte.wikiword.model.WikiWordConcept;
import de.brightbyte.wikiword.model.WikiWordConceptReference;

-public class ConceptRelatedness<K> implements Similarity<WikiWordConcept> {
+public class ConceptRelatedness<C extends WikiWordConcept> implements Similarity<C> {

- public static class Relatedness {
+ public static class Relatedness<C extends WikiWordConcept> {
public final double relatedness;
- public final WikiWordConceptReference a;
- public final WikiWordConceptReference b;
+ public final WikiWordConceptReference<C> a;
+ public final WikiWordConceptReference<C> b;

- public Relatedness(final double relatedness, final WikiWordConceptReference a, final WikiWordConceptReference b) {
+ public Relatedness(final double relatedness, final WikiWordConceptReference<C> a, final WikiWordConceptReference<C> b) {
super();
this.relatedness = relatedness;
this.a = a;
@@ -27,25 +27,25 @@
}
}

- protected Similarity<LabeledVector<K>> similarityMeasure;
- protected FeatureFetcher<K> featureFetcher;
+ protected Similarity<LabeledVector<Integer>> similarityMeasure;
+ protected FeatureFetcher<C> featureFetcher;

- public ConceptRelatedness(Similarity<LabeledVector<K>> similarityMeasure, FeatureFetcher<K> featureFetcher) {
+ public ConceptRelatedness(Similarity<LabeledVector<Integer>> similarityMeasure, FeatureFetcher<C> featureFetcher) {
this.similarityMeasure = similarityMeasure;
this.featureFetcher = featureFetcher;
}

- public Relatedness relatedness(WikiWordConcept a, WikiWordConcept b) {
+ public Relatedness relatedness(C a, C b) {
double d = similarity(a, b);
- return new Relatedness(d, a.getReference(), b.getReference());
+ return new Relatedness<C>(d, a.getReference(), b.getReference());
}

- public double similarity(WikiWordConcept a, WikiWordConcept b) {
+ public double similarity(C a, C b) {
try {
- LabeledVector<K> fa = featureFetcher.getFeatures(a);
- LabeledVector<K> fb = featureFetcher.getFeatures(b);
+ ConceptFeatures<C> fa = featureFetcher.getFeatures(a);
+ ConceptFeatures<C> fb = featureFetcher.getFeatures(b);

- double d = similarityMeasure.similarity(fa, fb);
+ double d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector());
return d;
} catch (PersistenceException e) {
throw new UncheckedPersistenceException(e);

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -1,9 +1,8 @@
package de.brightbyte.wikiword.disambig;

-import de.brightbyte.data.LabeledVector;
import de.brightbyte.util.PersistenceException;
import de.brightbyte.wikiword.model.WikiWordConcept;

-public interface FeatureFetcher<K> {
- public LabeledVector<K> getFeatures(WikiWordConcept c) throws PersistenceException;
+public interface FeatureFetcher<C extends WikiWordConcept> {
+ public ConceptFeatures<C> getFeatures(C c) throws PersistenceException;
}

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinkFeatureFetcher.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -7,7 +7,7 @@
import de.brightbyte.wikiword.model.WikiWordConceptReference;
import de.brightbyte.wikiword.store.WikiWordConceptStore;

-public class LinkFeatureFetcher implements FeatureFetcher<Integer> {
+public class LinkFeatureFetcher<C extends WikiWordConcept> implements FeatureFetcher<C> {
protected boolean useRelevance;
protected boolean useCardinality;

@@ -33,7 +33,7 @@
return x*y;
}

- public LabeledVector<Integer> getFeatures(WikiWordConcept c) throws PersistenceException {
+ public ConceptFeatures<C> getFeatures(WikiWordConcept c) throws PersistenceException {
LabeledVector<Integer> features = new MapLabeledVector<Integer>();

//XXX: magic numbers!
@@ -72,7 +72,7 @@

//XXX: compare cooccurrances (i.e. eval second level cooc)

- return features;
+ return new ConceptFeatures<C>(c.getReference(), features);
}

private Integer getLabel(WikiWordConceptReference r) {

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -49,6 +49,7 @@

protected EntityTable conceptInfoTable;
protected EntityTable conceptDescriptionTable;
+ protected EntityTable conceptFeaturesTable;

private String fields(String... f) {
if (f.length==0) return null;
@@ -115,7 +116,7 @@
setGroupConcatMaxLen(listBlobSize); //TODO: if it's larger currently, don't shrink!

conceptInfoTable = new EntityTable(this, "concept_info", getDefaultTableAttributes());
- conceptInfoTable.addField( new DatabaseField(this, "concept", "INT", "AUTO_INCREMENT", true, KeyType.PRIMARY ) );
+ conceptInfoTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) );
conceptInfoTable.addField( new DatabaseField(this, "inlinks", getTextType(listBlobSize), null, false, null ) );
conceptInfoTable.addField( new DatabaseField(this, "outlinks", getTextType(listBlobSize), null, false, null ) );
conceptInfoTable.addField( new DatabaseField(this, "narrower", getTextType(listBlobSize), null, false, null ) );
@@ -129,11 +130,17 @@

if (description) {
conceptDescriptionTable = new EntityTable(this, "concept_description", getDefaultTableAttributes());
- conceptDescriptionTable.addField( new DatabaseField(this, "concept", "INT", "AUTO_INCREMENT", true, KeyType.PRIMARY ) );
+ conceptDescriptionTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) );
conceptDescriptionTable.addField( new DatabaseField(this, "terms", getTextType(listBlobSize), null, false, null ) );
conceptDescriptionTable.setAutomaticField(null);
addTable(conceptDescriptionTable);
}
+
+ conceptFeaturesTable = new EntityTable(this, "concept_features", getDefaultTableAttributes());
+ conceptFeaturesTable.addField( new DatabaseField(this, "concept", "INT", null, true, KeyType.PRIMARY ) );
+ conceptFeaturesTable.addField( new DatabaseField(this, "features", getFieldType(byte[].class), null, false, null ) );
+ conceptFeaturesTable.setAutomaticField(null);
+ addTable(conceptFeaturesTable);
}

@Override

Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/WikiWordStoreSchema.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -297,22 +297,34 @@
}


- public String encodeSet(ConceptType[] values) {
- int[] vv = new int[values.length];
- for (int i=0; i<values.length; i++) vv[i] = values[i].getCode();
- return encodeSet(vv);
+ public String encodeSet(ConceptType[] values) throws PersistenceException {
+ try {
+ int[] vv = new int[values.length];
+ for (int i=0; i<values.length; i++) vv[i] = values[i].getCode();
+ return encodeSet(vv);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
}

- public String encodeSet(ResourceType[] values) {
- int[] vv = new int[values.length];
- for (int i=0; i<values.length; i++) vv[i] = values[i].getCode();
- return encodeSet(vv);
+ public String encodeSet(ResourceType[] values) throws PersistenceException {
+ try {
+ int[] vv = new int[values.length];
+ for (int i=0; i<values.length; i++) vv[i] = values[i].getCode();
+ return encodeSet(vv);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
}

- public String encodeSet(Namespace[] values) {
- int[] vv = new int[values.length];
- for (int i=0; i<values.length; i++) vv[i] = values[i].getNumber();
- return encodeSet(vv);
+ public String encodeSet(Namespace[] values) throws PersistenceException {
+ try {
+ int[] vv = new int[values.length];
+ for (int i=0; i<values.length; i++) vv[i] = values[i].getNumber();
+ return encodeSet(vv);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
}



Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java
===================================================================
--- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -376,9 +376,13 @@
public DataSet<C> getConcepts(int[] ids)
throws PersistenceException {

- String sql = conceptSelect("-1");
- sql += " WHERE C.id IN " + database.encodeSet(ids);
- return new QueryDataSet<C>(database, new ConceptFactory(), "getConcepts", sql, false);
+ try {
+ String sql = conceptSelect("-1");
+ sql += " WHERE C.id IN " + database.encodeSet(ids);
+ return new QueryDataSet<C>(database, new ConceptFactory(), "getConcepts", sql, false);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
}

protected abstract C newConcept(Map<String, Object> data) throws PersistenceException;

Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java
===================================================================
--- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/BuildConceptInfo.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -2,8 +2,13 @@

import java.io.IOException;

+import de.brightbyte.data.cursor.CursorProcessor;
+import de.brightbyte.data.cursor.DataCursor;
import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.disambig.ConceptFeatures;
+import de.brightbyte.wikiword.disambig.FeatureFetcher;
import de.brightbyte.wikiword.model.WikiWordConcept;
+import de.brightbyte.wikiword.model.WikiWordConceptReference;
import de.brightbyte.wikiword.store.builder.ConceptInfoStoreBuilder;
import de.brightbyte.wikiword.store.builder.WikiWordConceptStoreBuilder;

@@ -12,9 +17,9 @@
* ImportDump can be invoked as a standalone program, use --help as a
* command line parameter for usage information.
*/
-public class BuildConceptInfo extends ImportApp<WikiWordConceptStoreBuilder<? extends WikiWordConcept>> {
+public class BuildConceptInfo<C extends WikiWordConcept> extends ImportApp<WikiWordConceptStoreBuilder<C>> {

- protected ConceptInfoStoreBuilder infoStore;
+ protected ConceptInfoStoreBuilder<C> infoStore;

public BuildConceptInfo() {
super("BuildConceptInfo", true, true);
@@ -39,10 +44,38 @@

@Override
protected void run() throws Exception {
- section("-- build info --------------------------------------------------");
+ section("-- build concept property cache --------------------------------------------------");
this.infoStore.buildConceptInfo();
+
+ section("-- build concept feature vector cache --------------------------------------------------");
+ if (agenda.beginTask("buildConceptInfo", "buildConceptFeatureVectors")) {
+ //TODO: cleanup incomplete run
+ buildConceptFeatureVectors();
+ agenda.endTask("buildConceptInfo", "buildConceptFeatureVectors");
+ }
}

+ protected FeatureFetcher<C> featureFetcher;
+
+ private void buildConceptFeatureVectors() {
+ CursorProcessor<C> p = new CursorProcessor<C>() {
+
+ public void process(DataCursor<C> c) throws Exception {
+
+ C r;
+ while ((r = c.next())!=null) {
+ ConceptFeatures<C> features = featureFetcher.getFeatures(r);
+ infoStore.storeConceptFeatures(features);
+ }
+
+ infoStore.flush();
+ }
+
+ };
+
+ conceptStore.processConcepts(p);
+ }
+
public static void main(String[] argv) throws Exception {
BuildConceptInfo app = new BuildConceptInfo();
app.launch(argv);

Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java
===================================================================
--- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptInfoStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -1,8 +1,18 @@
package de.brightbyte.wikiword.store.builder;

+import de.brightbyte.data.cursor.CursorProcessor;
import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.disambig.ConceptFeatures;
import de.brightbyte.wikiword.model.WikiWordConcept;
+import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec;

public interface ConceptInfoStoreBuilder<C extends WikiWordConcept> extends WikiWordStoreBuilder {
public void buildConceptInfo() throws PersistenceException;
+
+ public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField,
+ ReferenceListEntrySpec spec, String threshold) throws PersistenceException;
+
+ public void storeConceptFeatures(ConceptFeatures<C> features) throws PersistenceException;
+
+ public int processConcepts(CursorProcessor<C> processor) throws PersistenceException;
}

Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java
===================================================================
--- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -1397,6 +1397,23 @@
}
}

+
+ protected static DatabaseDataSet.Factory<LocalConceptReference> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConceptReference>() {
+ public LocalConceptReference newInstance(ResultSet row) throws SQLException, PersistenceException {
+ int id = row.getInt("id");
+ String name = asString(row.getObject("name"));
+
+ return new LocalConceptReference(id, name, -1, -1);
+ }
+ };
+
+ public int processConcepts(final CursorProcessor<LocalConceptReference> processor) throws PersistenceException {
+ String sql = "SELECT * FROM "+conceptTable.getSQLName();
+ String where = "type = "+ConceptType.UNKNOWN.getCode();
+
+ DatabaseAccess.SimpleChunkedQuery query = new DatabaseAccess.SimpleChunkedQuery(getDatabaseAccess(), "processUnknownConcepts", "process", sql, where, null, conceptTable, "id");
+ return executeChunkedQuery(query, 1, localConceptReferenceFactory, processor);
+ }
}

//////////////////////////////////////////////////////////////////////////////

Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java
===================================================================
--- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -3,6 +3,7 @@
import static de.brightbyte.db.DatabaseUtil.asInt;
import static de.brightbyte.db.DatabaseUtil.asString;

+import java.sql.Blob;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
@@ -26,6 +27,7 @@
import de.brightbyte.util.Processor;
import de.brightbyte.wikiword.ConceptType;
import de.brightbyte.wikiword.TweakSet;
+import de.brightbyte.wikiword.disambig.ConceptFeatures;
import de.brightbyte.wikiword.model.WikiWordConcept;
import de.brightbyte.wikiword.model.WikiWordConceptReference;
import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema;
@@ -624,13 +626,17 @@
}

protected void storeStatsEntry(String block, String name, double value) throws PersistenceException {
- //TODO: inserter?...
- String sql = "REPLACE INTO "+statsTable.getSQLName()+" (block, name, value) VALUES ("
- +database.encodeValue(block)+", "
- +database.encodeValue(name)+", "
- +database.encodeValue(value)+") ";
-
- executeUpdate("storeStatsEntry", sql);
+ try {
+ //TODO: inserter?...
+ String sql = "REPLACE INTO "+statsTable.getSQLName()+" (block, name, value) VALUES ("
+ +database.encodeValue(block)+", "
+ +database.encodeValue(name)+", "
+ +database.encodeValue(value)+") ";
+
+ executeUpdate("storeStatsEntry", sql);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
}

protected void storeStatsEntries(String block, ResultSet rs, GroupNameTranslator translator) throws PersistenceException {
@@ -679,13 +685,17 @@
String name = e.getKey();
double value = e.getValue().doubleValue();

- sql.append( "(" );
- sql.append(database.encodeValue(block));
- sql.append(", ");
- sql.append(database.encodeValue(name));
- sql.append(", ");
- sql.append(database.encodeValue(value));
- sql.append( ")" );
+ try {
+ sql.append( "(" );
+ sql.append(database.encodeValue(block));
+ sql.append(", ");
+ sql.append(database.encodeValue(name));
+ sql.append(", ");
+ sql.append(database.encodeValue(value));
+ sql.append( ")" );
+ } catch (SQLException e1) {
+ throw new PersistenceException(e1);
+ }
}

executeUpdate("storeStatsEntries", sql.toString());
@@ -776,12 +786,18 @@
protected WikiWordConceptStoreSchema conceptDatabase;

protected EntityTable conceptInfoTable;
+ protected EntityTable conceptFeaturesTable;
+ protected Inserter conceptFeaturesInserter;

protected DatabaseConceptInfoStoreBuilder(ConceptInfoStoreSchema database, TweakSet tweaks, Agenda agenda) throws SQLException {
super(database, tweaks, agenda);

Inserter conceptInfoInserter = configureTable("concept_info", 64, 1024);
conceptInfoTable = (EntityTable)conceptInfoInserter.getTable();
+
+ conceptFeaturesInserter = configureTable("concept_features", 64, 1024);
+ conceptFeaturesInserter.setLenient(true); //ignore dupes. //TODO: replace instead!
+ conceptFeaturesTable = (EntityTable)conceptFeaturesInserter.getTable();
}

public void buildConceptInfo() throws PersistenceException {
@@ -848,6 +864,11 @@
return executeChunkedUpdate("prepareConceptCache", cacheTable.getName()+"."+conceptIdField, sql, null, t, "id");
}

+ public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField,
+ ReferenceListEntrySpec spec, String threshold) throws PersistenceException {
+ return buildConceptPropertyCache(conceptInfoTable, "concept", targetField, propertyTable, propertyConceptField, spec, false, threshold, 1);
+ }
+
protected int buildConceptPropertyCache(
final DatabaseTable cacheTable, final String cacheIdField,
final String propertyField, final String realtion, final String relConceptField,
@@ -889,5 +910,19 @@
return executeChunkedUpdate(query, chunkFactor);
}

+ /**
+ * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeRawText(int, java.lang.String)
+ */
+ public void storeConceptFeatures(ConceptFeatures<T> features) throws PersistenceException {
+ try {
+ if (conceptFeaturesInserter==null) conceptFeaturesInserter = conceptFeaturesTable.getInserter();
+
+ conceptFeaturesInserter.updateInt("concept", features.getConceptId());
+ conceptFeaturesInserter.updateBlob("features", features.getFeatureVectorData());
+ conceptFeaturesInserter.updateRow();
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
+ }
}
}

Modified: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java
===================================================================
--- trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java 2009-11-24 14:07:57 UTC (rev 59382)
+++ trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java 2009-11-24 17:12:29 UTC (rev 59383)
@@ -18,10 +18,12 @@
import de.brightbyte.wikiword.DatasetIdentifier;
import de.brightbyte.wikiword.ExtractionRule;
import de.brightbyte.wikiword.ResourceType;
+import de.brightbyte.wikiword.disambig.ConceptFeatures;
import de.brightbyte.wikiword.model.LocalConcept;
import de.brightbyte.wikiword.model.LocalConceptReference;
import de.brightbyte.wikiword.model.WikiWordConceptReference;
import de.brightbyte.wikiword.schema.AliasScope;
+import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec;
import de.brightbyte.wikiword.store.GroupNameTranslator;
import de.brightbyte.wikiword.store.WikiWordConceptStore;

@@ -420,6 +422,15 @@
public DatasetIdentifier getDatasetIdentifier() {
return dataset;
}
+
+ public int buildConceptPropertyCache(String targetField, String propertyTable, String propertyConceptField, ReferenceListEntrySpec spec, String threshold) throws PersistenceException {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ public void storeConceptFeatures(ConceptFeatures<LocalConcept> features) throws PersistenceException {
+ log("+ storeConceptFeatures: concept = "+features.getConceptId()+", features = "+features.getFeatureVector());
+ }

}




_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS [at] lists
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.