Skip to content

Commit

Permalink
Merge branch 'master' of github.com:dwslab/melt
Browse files Browse the repository at this point in the history
  • Loading branch information
sven-h committed Feb 17, 2021
2 parents a9d71c4 + cb09b09 commit c78f5d2
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 2 deletions.
6 changes: 6 additions & 0 deletions matching-jena-matchers/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,12 @@
<version>7.2.0</version>
</dependency>

<!-- Annotations for better code quality -->
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>20.1.0</version>
</dependency>

</dependencies>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics;

import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker;
import org.jetbrains.annotations.NotNull;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* This class allows to analyze the concept coverage given a data source.
*/
public class Coverage {


/**
* Calculate the partial coverage - that is also allow for matching only concept parts (sub-strings).
* @param linker Linker to be used.
* @param entities Entities to be matched.
* @return A coverage result instance.
*/
@NotNull
public static CoverageResult getCoveragePartialLabel(LabelToConceptLinker linker, Set<String> entities){
Map<String, Set<String>> conceptsFound = new HashMap<>();
Set<String> conceptsNotFound = new HashSet<>();
for (String concept : entities){
Set<String> links = linker.linkToPotentiallyMultipleConcepts(concept);
if(links == null || links.size() == 0){
conceptsNotFound.add(concept);
} else {
conceptsFound.put(concept, links);
}
}
float coverageScore = (float) conceptsFound.size() / entities.size();
return new CoverageResult(coverageScore, conceptsFound, conceptsNotFound);
}

/**
* Calculate the coverage given a linker and a set of concepts that are to be linked.
* @param linker The linker to be used.
* @param entities The entities to be looked up.
* @return A coverage result instance.
*/
@NotNull
public static CoverageResult getCoverageFullLabel(LabelToConceptLinker linker, Set<String> entities){
Map<String, Set<String>> conceptsFound = new HashMap<>();
Set<String> conceptsNotFound = new HashSet<>();
for (String concept : entities){
Set<String> links = new HashSet<>();
String link = linker.linkToSingleConcept(concept);
if(link == null || link.equals("")){
conceptsNotFound.add(concept);
} else {
links.add(link);
conceptsFound.put(concept, links);
}
}
float coverageScore = (float) conceptsFound.size() / entities.size();
return new CoverageResult(coverageScore, conceptsFound, conceptsNotFound);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics;


import java.util.Map;
import java.util.Set;

/**
* A CoverageResult is immutable.
*/
public class CoverageResult {


/**
* Constructor
* @param coverageScore The coverage score.
* @param linkedConcepts The linked concepts.
* @param nonLinkedConcepts The concepts not linked.
*/
public CoverageResult(float coverageScore, Map<String, Set<String>> linkedConcepts, Set<String> nonLinkedConcepts){
this.coverageScore = coverageScore;
this.linkedConcepts = linkedConcepts;
this.nonLinkedConcepts = nonLinkedConcepts;
}

/**
* The share of linked strings.
*/
private final float coverageScore;

/**
* The concepts that were linked
*/
private final Map<String, Set<String>> linkedConcepts;

/**
* The concepts that were not linked.
*/
private final Set<String> nonLinkedConcepts;

public float getCoverageScore() {
return coverageScore;
}

public Map<String, Set<String>> getLinkedConcepts() {
return linkedConcepts;
}

public Set<String> getNonLinkedConcepts() {
return nonLinkedConcepts;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public static String normalizeForAlodXLLookupWithoutTokenization(String lookupSt
/**
* This method will strip the URL part from the URI.
* @param uri URI that shall be stripped.
* @return unstripped URI
* @return Unstripped URI
*/
public static String unstripUriXl(String uri){
uri = uri.replaceAll("%20", " ");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.statistics;


import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.webIsAlod.classic.WebIsAlodClassicLinker;
import org.junit.jupiter.api.Test;

import java.util.HashSet;
import java.util.Set;

import static org.junit.jupiter.api.Assertions.*;

class CoverageTest {


@Test
void getCoveragePartialLabel(){
WebIsAlodClassicLinker linker = new WebIsAlodClassicLinker();
Set<String> entities = new HashSet<>();
entities.add("europe");
entities.add("EU");
entities.add("XKRZY335_NOT_EXISTING car");

CoverageResult result = Coverage.getCoveragePartialLabel(linker, entities);
assertEquals((float) 1, result.getCoverageScore());
assertTrue(result.getLinkedConcepts().containsKey("europe"));
assertTrue(result.getLinkedConcepts().containsKey("EU"));
assertTrue(result.getLinkedConcepts().containsKey("XKRZY335_NOT_EXISTING car"));
assertEquals(0, result.getNonLinkedConcepts().size());
}

@Test
void getCoverageFullLabel() {
WebIsAlodClassicLinker linker = new WebIsAlodClassicLinker();
Set<String> entities = new HashSet<>();
entities.add("europe");
entities.add("EU");
entities.add("XKRZY335_NOT_EXISTING");

CoverageResult result = Coverage.getCoverageFullLabel(linker, entities);
assertEquals((float) 2 / 3, result.getCoverageScore());
assertTrue(result.getLinkedConcepts().containsKey("europe"));
assertTrue(result.getLinkedConcepts().containsKey("EU"));
assertFalse(result.getLinkedConcepts().containsKey("XKRZY335_NOT_EXISTING"));
assertTrue(result.getNonLinkedConcepts().contains("XKRZY335_NOT_EXISTING"));
assertFalse(result.getNonLinkedConcepts().contains("europe"));
assertFalse(result.getNonLinkedConcepts().contains("EU"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
*/
public class PythonServer {


/**
* Default logger
*/
Expand All @@ -53,7 +54,6 @@ public class PythonServer {
* ObjectMapper from jackson to generate JSON.
*/
private static final ObjectMapper JSON_MAPPER = new ObjectMapper();


/**
* Constructor
Expand Down

0 comments on commit c78f5d2

Please sign in to comment.