Skip to content

Commit

Permalink
Moved the filtering of markings for the D2KB and Entity Typing class …
Browse files Browse the repository at this point in the history
…into an EvaluationDecorator to handle the problem not only for experiment tasks but for experiment sub tasks in the same way.
  • Loading branch information
MichaelRoeder committed Nov 9, 2015
1 parent eead8c5 commit 2f70646
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 104 deletions.
31 changes: 19 additions & 12 deletions src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
import org.aksw.gerbil.evaluate.impl.SpanMergingEvaluatorDecorator;
import org.aksw.gerbil.evaluate.impl.SubTaskAverageCalculator;
import org.aksw.gerbil.evaluate.impl.filter.MarkingFilteringEvaluatorDecorator;
import org.aksw.gerbil.evaluate.impl.filter.SearcherBasedNotMatchingMarkingFilter;
import org.aksw.gerbil.matching.Matching;
import org.aksw.gerbil.matching.MatchingsSearcher;
import org.aksw.gerbil.matching.MatchingsSearcherFactory;
import org.aksw.gerbil.matching.impl.CompoundMatchingsCounter;
import org.aksw.gerbil.matching.impl.HierarchicalMatchingsCounter;
import org.aksw.gerbil.matching.impl.MatchingsCounterImpl;
import org.aksw.gerbil.matching.impl.MeaningMatchingsSearcher;
import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher;
import org.aksw.gerbil.semantic.kb.ExactWhiteListBasedUriKBClassifier;
import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier;
import org.aksw.gerbil.semantic.kb.UriKBClassifier;
Expand Down Expand Up @@ -142,20 +144,25 @@ protected Evaluator createEvaluator(ExperimentType type, ExperimentTaskConfigura
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
}
case D2KB: {
return new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
new InKBClassBasedFMeasureCalculator<NamedEntity>(new CompoundMatchingsCounter<NamedEntity>(
(MatchingsSearcher<NamedEntity>) MatchingsSearcherFactory
.createSpanMatchingsSearcher(configuration.matching),
new MeaningMatchingsSearcher<NamedEntity>(globalClassifier)), globalClassifier),
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
return new SearcherBasedNotMatchingMarkingFilter<NamedEntity>(
new StrongSpanMatchingsSearcher<NamedEntity>(),
new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
new InKBClassBasedFMeasureCalculator<NamedEntity>(
new CompoundMatchingsCounter<NamedEntity>(
(MatchingsSearcher<NamedEntity>) MatchingsSearcherFactory
.createSpanMatchingsSearcher(configuration.matching),
new MeaningMatchingsSearcher<NamedEntity>(globalClassifier)),
globalClassifier),
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()));
}
case ETyping: {
return new ConfidenceScoreEvaluatorDecorator<TypedSpan>(
new HierarchicalFMeasureCalculator<TypedSpan>(new HierarchicalMatchingsCounter<TypedSpan>(
(MatchingsSearcher<TypedSpan>) MatchingsSearcherFactory
.createSpanMatchingsSearcher(configuration.matching),
globalClassifier, inferencer)),
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator());
return new SearcherBasedNotMatchingMarkingFilter<TypedSpan>(new StrongSpanMatchingsSearcher<TypedSpan>(),
new ConfidenceScoreEvaluatorDecorator<TypedSpan>(
new HierarchicalFMeasureCalculator<TypedSpan>(new HierarchicalMatchingsCounter<TypedSpan>(
(MatchingsSearcher<TypedSpan>) MatchingsSearcherFactory
.createSpanMatchingsSearcher(configuration.matching),
globalClassifier, inferencer)),
FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()));
}
case OKE_Task1: {
ExperimentTaskConfiguration subTaskConfig;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.aksw.gerbil.evaluate.impl.filter;

import java.util.ArrayList;
import java.util.List;

import org.aksw.gerbil.evaluate.AbstractEvaluatorDecorator;
import org.aksw.gerbil.evaluate.EvaluationResultContainer;
import org.aksw.gerbil.evaluate.Evaluator;
import org.aksw.gerbil.matching.MatchingsSearcher;
import org.aksw.gerbil.transfer.nif.Marking;

import com.carrotsearch.hppc.BitSet;

/**
* This evaluator decorator removes every marking from the given list that does
* not match the given gold standard list based on a given
* {@link MatchingsSearcher} instance.
*
* @author Michael R&ouml;der ([email protected])
*
*/
public class SearcherBasedNotMatchingMarkingFilter<T extends Marking> extends AbstractEvaluatorDecorator<T> {

protected MatchingsSearcher<T> searcher;

public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher<T> searcher, Evaluator<T> evaluator) {
super(evaluator);
this.searcher = searcher;
}

protected List<List<T>> filterListOfMarkings(List<List<T>> markings, List<List<T>> goldStandard) {
List<List<T>> filteredMarkings = new ArrayList<List<T>>(markings.size());
for (int i = 0; i < markings.size(); ++i) {
filteredMarkings.add(filterMarkings(markings.get(i), goldStandard.get(i)));
}
return filteredMarkings;
}

protected List<T> filterMarkings(List<T> markings, List<T> goldStandard) {
BitSet matchingElements;
BitSet alreadyUsedResults = new BitSet(goldStandard.size());
List<T> filteredMarkings = new ArrayList<T>(markings.size());
for (T marking : markings) {
matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults);
if (!matchingElements.isEmpty()) {
filteredMarkings.add(marking);
alreadyUsedResults.set(matchingElements.nextSetBit(0));
}
}
return filteredMarkings;
}

@Override
public void evaluate(List<List<T>> annotatorResults, List<List<T>> goldStandard,
EvaluationResultContainer results) {
evaluator.evaluate(filterListOfMarkings(annotatorResults, goldStandard), goldStandard, results);
}

}
27 changes: 4 additions & 23 deletions src/main/java/org/aksw/gerbil/execute/ExperimentTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
import org.aksw.gerbil.evaluate.SubTaskResult;
import org.aksw.gerbil.evaluate.impl.FMeasureCalculator;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.matching.filter.SearcherBasedNotMatchingMarkingFilter;
import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher;
import org.aksw.gerbil.semantic.sameas.DatasetBasedSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.MultipleSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.SameAsRetriever;
Expand Down Expand Up @@ -307,19 +305,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator,
List<List<MeaningSpan>> results = new ArrayList<List<MeaningSpan>>(dataset.size());
List<List<MeaningSpan>> goldStandard = new ArrayList<List<MeaningSpan>>(dataset.size());
D2KBAnnotator linker = ((D2KBAnnotator) annotator);
// For D2KB we have to filter the results to get those results
// that are matching the positions
SearcherBasedNotMatchingMarkingFilter<MeaningSpan> filter = new SearcherBasedNotMatchingMarkingFilter<MeaningSpan>(
new StrongSpanMatchingsSearcher<MeaningSpan>());
List<MeaningSpan> documentGS;

for (Document document : dataset.getInstances()) {
documentGS = document.getMarkings(MeaningSpan.class);
// reduce the document to a text and a list of Spans
results.add(filter.filterMarkings(
linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)),
documentGS));
goldStandard.add(documentGS);
results.add(linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)));
goldStandard.add(document.getMarkings(MeaningSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
Expand Down Expand Up @@ -413,20 +403,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator,
List<List<TypedSpan>> results = new ArrayList<List<TypedSpan>>(dataset.size());
List<List<TypedSpan>> goldStandard = new ArrayList<List<TypedSpan>>(dataset.size());
EntityTyper typer = ((EntityTyper) annotator);
// For ETyping we have to filter the results to get those
// results
// that are matching the positions
SearcherBasedNotMatchingMarkingFilter<TypedSpan> filter = new SearcherBasedNotMatchingMarkingFilter<TypedSpan>(
new StrongSpanMatchingsSearcher<TypedSpan>());
List<TypedSpan> documentGS;

for (Document document : dataset.getInstances()) {
documentGS = document.getMarkings(TypedSpan.class);
// reduce the document to a text and a list of Spans
results.add(filter.filterMarkings(
typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)),
documentGS));
goldStandard.add(documentGS);
results.add(typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)));
goldStandard.add(document.getMarkings(TypedSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
Expand Down

This file was deleted.

This file was deleted.

12 changes: 12 additions & 0 deletions src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,20 @@

public interface MarkingFilter<T extends Marking> {

/**
* Returns true if the marking is good and does not have to be filtered out.
*
* @param marking
* @return
*/
public boolean isMarkingGood(T marking);

/**
* Returns a filtered list based on the given list.
*
* @param markings
* @return
*/
public List<T> filterList(List<T> markings);

public List<List<T>> filterListOfLists(List<List<T>> markings);
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/org/aksw/gerbil/SingleRunTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ public class SingleRunTest implements TaskObserver {

private static final Logger LOGGER = LoggerFactory.getLogger(SingleRunTest.class);

private static final String ANNOTATOR_NAME = "FOX";
private static final String DATASET_NAME = "N3-Reuters-128";
private static final String ANNOTATOR_NAME = "TagMe 2";
private static final String DATASET_NAME = "ACE2004";
private static final ExperimentType EXPERIMENT_TYPE = ExperimentType.D2KB;
private static final Matching MATCHING = Matching.WEAK_ANNOTATION_MATCH;
private static final Matching MATCHING = Matching.STRONG_ENTITY_MATCH;

public static void main(String[] args) throws Exception {
SingleRunTest test = new SingleRunTest();
Expand Down

0 comments on commit 2f70646

Please sign in to comment.