-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:dwslab/melt
- Loading branch information
Showing
6 changed files
with
169 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
...uni_mannheim/informatik/dws/melt/matching_jena_matchers/external/statistics/Coverage.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics; | ||
|
||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
/** | ||
* This class allows to analyze the concept coverage given a data source. | ||
*/ | ||
public class Coverage { | ||
|
||
|
||
/** | ||
* Calculate the partial coverage - that is also allow for matching only concept parts (sub-strings). | ||
* @param linker Linker to be used. | ||
* @param entities Entities to be matched. | ||
* @return A coverage result instance. | ||
*/ | ||
@NotNull | ||
public static CoverageResult getCoveragePartialLabel(LabelToConceptLinker linker, Set<String> entities){ | ||
Map<String, Set<String>> conceptsFound = new HashMap<>(); | ||
Set<String> conceptsNotFound = new HashSet<>(); | ||
for (String concept : entities){ | ||
Set<String> links = linker.linkToPotentiallyMultipleConcepts(concept); | ||
if(links == null || links.size() == 0){ | ||
conceptsNotFound.add(concept); | ||
} else { | ||
conceptsFound.put(concept, links); | ||
} | ||
} | ||
float coverageScore = (float) conceptsFound.size() / entities.size(); | ||
return new CoverageResult(coverageScore, conceptsFound, conceptsNotFound); | ||
} | ||
|
||
/** | ||
* Calculate the coverage given a linker and a set of concepts that are to be linked. | ||
* @param linker The linker to be used. | ||
* @param entities The entities to be looked up. | ||
* @return A coverage result instance. | ||
*/ | ||
@NotNull | ||
public static CoverageResult getCoverageFullLabel(LabelToConceptLinker linker, Set<String> entities){ | ||
Map<String, Set<String>> conceptsFound = new HashMap<>(); | ||
Set<String> conceptsNotFound = new HashSet<>(); | ||
for (String concept : entities){ | ||
Set<String> links = new HashSet<>(); | ||
String link = linker.linkToSingleConcept(concept); | ||
if(link == null || link.equals("")){ | ||
conceptsNotFound.add(concept); | ||
} else { | ||
links.add(link); | ||
conceptsFound.put(concept, links); | ||
} | ||
} | ||
float coverageScore = (float) conceptsFound.size() / entities.size(); | ||
return new CoverageResult(coverageScore, conceptsFound, conceptsNotFound); | ||
} | ||
} |
51 changes: 51 additions & 0 deletions
51
...nnheim/informatik/dws/melt/matching_jena_matchers/external/statistics/CoverageResult.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics; | ||
|
||
|
||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
/** | ||
* A CoverageResult is immutable. | ||
*/ | ||
public class CoverageResult { | ||
|
||
|
||
/** | ||
* Constructor | ||
* @param coverageScore The coverage score. | ||
* @param linkedConcepts The linked concepts. | ||
* @param nonLinkedConcepts The concepts not linked. | ||
*/ | ||
public CoverageResult(float coverageScore, Map<String, Set<String>> linkedConcepts, Set<String> nonLinkedConcepts){ | ||
this.coverageScore = coverageScore; | ||
this.linkedConcepts = linkedConcepts; | ||
this.nonLinkedConcepts = nonLinkedConcepts; | ||
} | ||
|
||
/** | ||
* The share of linked strings. | ||
*/ | ||
private final float coverageScore; | ||
|
||
/** | ||
* The concepts that were linked | ||
*/ | ||
private final Map<String, Set<String>> linkedConcepts; | ||
|
||
/** | ||
* The concepts that were not linked. | ||
*/ | ||
private final Set<String> nonLinkedConcepts; | ||
|
||
public float getCoverageScore() { | ||
return coverageScore; | ||
} | ||
|
||
public Map<String, Set<String>> getLinkedConcepts() { | ||
return linkedConcepts; | ||
} | ||
|
||
public Set<String> getNonLinkedConcepts() { | ||
return nonLinkedConcepts; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
...mannheim/informatik/dws/melt/matching_jena_matchers/external/statistics/CoverageTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics; | ||
|
||
|
||
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.webIsAlod.classic.WebIsAlodClassicLinker; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
import static org.junit.jupiter.api.Assertions.*; | ||
|
||
class CoverageTest { | ||
|
||
|
||
@Test | ||
void getCoveragePartialLabel(){ | ||
WebIsAlodClassicLinker linker = new WebIsAlodClassicLinker(); | ||
Set<String> entities = new HashSet<>(); | ||
entities.add("europe"); | ||
entities.add("EU"); | ||
entities.add("XKRZY335_NOT_EXISTING car"); | ||
|
||
CoverageResult result = Coverage.getCoveragePartialLabel(linker, entities); | ||
assertEquals((float) 1, result.getCoverageScore()); | ||
assertTrue(result.getLinkedConcepts().containsKey("europe")); | ||
assertTrue(result.getLinkedConcepts().containsKey("EU")); | ||
assertTrue(result.getLinkedConcepts().containsKey("XKRZY335_NOT_EXISTING car")); | ||
assertEquals(0, result.getNonLinkedConcepts().size()); | ||
} | ||
|
||
@Test | ||
void getCoverageFullLabel() { | ||
WebIsAlodClassicLinker linker = new WebIsAlodClassicLinker(); | ||
Set<String> entities = new HashSet<>(); | ||
entities.add("europe"); | ||
entities.add("EU"); | ||
entities.add("XKRZY335_NOT_EXISTING"); | ||
|
||
CoverageResult result = Coverage.getCoverageFullLabel(linker, entities); | ||
assertEquals((float) 2 / 3, result.getCoverageScore()); | ||
assertTrue(result.getLinkedConcepts().containsKey("europe")); | ||
assertTrue(result.getLinkedConcepts().containsKey("EU")); | ||
assertFalse(result.getLinkedConcepts().containsKey("XKRZY335_NOT_EXISTING")); | ||
assertTrue(result.getNonLinkedConcepts().contains("XKRZY335_NOT_EXISTING")); | ||
assertFalse(result.getNonLinkedConcepts().contains("europe")); | ||
assertFalse(result.getNonLinkedConcepts().contains("EU")); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters