Skip to content

Commit

Permalink
Merge pull request #48 from DiSSCo/feature/add-wikidata
Browse files Browse the repository at this point in the history
Support wikidata identifier + fix efg identifications
  • Loading branch information
samleeflang authored Dec 1, 2023
2 parents b9ef9ea + be8c364 commit 284678c
Show file tree
Hide file tree
Showing 17 changed files with 216 additions and 113 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package eu.dissco.core.translator.component;

import com.fasterxml.jackson.databind.JsonNode;
import eu.dissco.core.translator.exception.OrganisationException;
import java.util.concurrent.ExecutionException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.scheduler.Schedulers;

@Slf4j
@Component
@RequiredArgsConstructor
public class InstitutionNameComponent {

private final WebClient webClient;

@Cacheable("ror")
public String getRorName(String ror) throws OrganisationException {
log.info("Requesting organisation details for organisation: {} with ror", ror);
String url = "https://api.ror.org/organizations/" + ror;
var response = webClient.get().uri(url).retrieve().bodyToMono(JsonNode.class)
.publishOn(Schedulers.boundedElastic());
try {
var json = response.toFuture().get();
if (json != null) {
var name = json.get("name");
if (name != null) {
return name.asText();
}
}
} catch (InterruptedException e) {
log.error("Failed to make request to RoR service", e);
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
log.error("Failed to make request to RoR service", e);
}
log.warn("Could not match name to a ROR id for: {}", url);
throw new OrganisationException("Unable to retrieve organisationName");
}

@Cacheable("wikidata")
public String getWikiDataName(String wikidata) throws OrganisationException {
log.info("Requesting organisation details for organisation: {} with wikidata", wikidata);
String url = "https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/" + wikidata + "/labels/en";
var response = webClient.get().uri(url).retrieve().bodyToMono(JsonNode.class)
.publishOn(Schedulers.boundedElastic());
try {
var name = response.toFuture().get();
if (name != null && name.isTextual()) {
return name.asText();
}
} catch (InterruptedException e) {
log.error("Failed to make request to wikidata service", e);
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
log.error("Failed to make request to wikidata service", e);
}
log.warn("Could not match to an English (en) label to a wikidata id for: {}", url);
throw new OrganisationException("Unable to retrieve organisationName");
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package eu.dissco.core.translator.exception;

public class OrganisationException extends DiSSCoDataException {

public OrganisationException(String s) {
super(s);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import eu.dissco.core.translator.domain.DigitalSpecimenWrapper;
import eu.dissco.core.translator.domain.Enrichment;
import eu.dissco.core.translator.exception.DiSSCoDataException;
import eu.dissco.core.translator.exception.OrganisationNotRorId;
import eu.dissco.core.translator.exception.OrganisationException;
import eu.dissco.core.translator.properties.DwcaProperties;
import eu.dissco.core.translator.properties.EnrichmentProperties;
import eu.dissco.core.translator.properties.FdoProperties;
Expand Down Expand Up @@ -228,7 +228,7 @@ private String retrieveLicense(XMLEventReader xmlEventReader) throws XMLStreamEx
}

private List<DigitalMediaObjectEvent> processMedia(String recordId, JsonNode fullDigitalSpecimen,
String organisationId) throws OrganisationNotRorId {
String organisationId) throws OrganisationException {
var extensions = fullDigitalSpecimen.get(EXTENSIONS);
if (extensions != null) {
if (extensions.get(AC_MULTIMEDIA) != null) {
Expand Down Expand Up @@ -277,7 +277,7 @@ private List<DigitalMediaObjectEvent> extractMultiMedia(String recordId, JsonNod

private List<DigitalMediaObjectEvent> publishAssociatedMedia(String recordId,
String associatedMedia,
String organisationId) throws OrganisationNotRorId {
String organisationId) throws OrganisationException {
log.debug("Digital Specimen: {}, has associatedMedia {}", recordId,
associatedMedia);
String[] mediaUrls = associatedMedia.split("\\|");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.component.RorComponent;
import eu.dissco.core.translator.exception.OrganisationNotRorId;
import eu.dissco.core.translator.component.InstitutionNameComponent;
import eu.dissco.core.translator.exception.OrganisationException;
import eu.dissco.core.translator.exception.UnknownPhysicalSpecimenIdType;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
Expand Down Expand Up @@ -33,9 +33,11 @@
import eu.dissco.core.translator.terms.specimen.AccessRights;
import eu.dissco.core.translator.terms.specimen.BasisOfRecord;
import eu.dissco.core.translator.terms.specimen.CollectionId;
import eu.dissco.core.translator.terms.specimen.DataGeneralizations;
import eu.dissco.core.translator.terms.specimen.DatasetName;
import eu.dissco.core.translator.terms.specimen.Disposition;
import eu.dissco.core.translator.terms.specimen.HasMedia;
import eu.dissco.core.translator.terms.specimen.InformationWithheld;
import eu.dissco.core.translator.terms.specimen.LivingOrPreserved;
import eu.dissco.core.translator.terms.specimen.MarkedAsType;
import eu.dissco.core.translator.terms.specimen.Modified;
Expand Down Expand Up @@ -82,9 +84,9 @@
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.ScientificName;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.ScientificNameAuthorship;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.SpecificEpithet;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.Subtribe;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.Subfamily;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.Subgenus;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.Subtribe;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.Superfamily;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.TaxonId;
import eu.dissco.core.translator.terms.specimen.identification.taxonomy.TaxonRank;
Expand Down Expand Up @@ -128,7 +130,6 @@
import eu.dissco.core.translator.terms.specimen.location.georeference.PointRadiusSpatialFit;
import eu.dissco.core.translator.terms.specimen.occurence.Behavior;
import eu.dissco.core.translator.terms.specimen.occurence.Caste;
import eu.dissco.core.translator.terms.specimen.DataGeneralizations;
import eu.dissco.core.translator.terms.specimen.occurence.DegreeOfEstablishment;
import eu.dissco.core.translator.terms.specimen.occurence.EstablishmentMeans;
import eu.dissco.core.translator.terms.specimen.occurence.EventDate;
Expand All @@ -137,7 +138,6 @@
import eu.dissco.core.translator.terms.specimen.occurence.FieldNumber;
import eu.dissco.core.translator.terms.specimen.occurence.GeoreferenceVerificationStatus;
import eu.dissco.core.translator.terms.specimen.occurence.Habitat;
import eu.dissco.core.translator.terms.specimen.InformationWithheld;
import eu.dissco.core.translator.terms.specimen.occurence.LifeStage;
import eu.dissco.core.translator.terms.specimen.occurence.OccurrenceAssertions;
import eu.dissco.core.translator.terms.specimen.occurence.OccurrenceRemarks;
Expand Down Expand Up @@ -181,13 +181,13 @@ public abstract class BaseDigitalObjectDirector {

protected final ObjectMapper mapper;
protected final TermMapper termMapper;
private final RorComponent rorComponent;
private final InstitutionNameComponent institutionNameComponent;
private final WebClientProperties webClientProperties;
private final FdoProperties fdoProperties;
private final List<String> identifierTerms;

public DigitalSpecimen assembleDigitalSpecimenTerm(JsonNode data, boolean dwc)
throws OrganisationNotRorId, UnknownPhysicalSpecimenIdType {
throws OrganisationException, UnknownPhysicalSpecimenIdType {
var ds = assembleDigitalSpecimenTerms(data, dwc);
ds.withOccurrences(assembleOccurrenceTerms(data, dwc));
ds.withDwcIdentification(assembleIdentifications(data, dwc));
Expand Down Expand Up @@ -231,7 +231,7 @@ protected Citations createCitation(JsonNode data,
}

private DigitalSpecimen assembleDigitalSpecimenTerms(JsonNode data, boolean dwc)
throws OrganisationNotRorId, UnknownPhysicalSpecimenIdType {
throws OrganisationException, UnknownPhysicalSpecimenIdType {
var physicalSpecimenIdTypeHarmonised = convertToPhysicalSpecimenIdTypeEnum(
termMapper.retrieveTerm(new PhysicalSpecimenIdType(), data, dwc));
var organisationId = termMapper.retrieveTerm(new OrganisationId(), data, dwc);
Expand All @@ -254,7 +254,7 @@ private DigitalSpecimen assembleDigitalSpecimenTerms(JsonNode data, boolean dwc)
.withDwcCollectionId(termMapper.retrieveTerm(new CollectionId(), data, dwc))
.withDctermsModified(termMapper.retrieveTerm(new Modified(), data, dwc))
.withDwcInstitutionName(
rorComponent.getRorName(minifyOrganisationId(organisationId)))
getInstitutionName(organisationId))
.withDwcRecordedBy(termMapper.retrieveTerm(new RecordedBy(), data, dwc))
.withDwcBasisOfRecord(termMapper.retrieveTerm(new BasisOfRecord(), data, dwc))
.withDctermsAccessRights(termMapper.retrieveTerm(new AccessRights(), data, dwc))
Expand All @@ -265,6 +265,18 @@ private DigitalSpecimen assembleDigitalSpecimenTerms(JsonNode data, boolean dwc)
.withDwcDataGeneralizations(termMapper.retrieveTerm(new DataGeneralizations(), data, dwc));
}

private String getInstitutionName(String organisationId) throws OrganisationException {
if (organisationId.startsWith("https://ror.org/")) {
var rorId = organisationId.replace("https://ror.org/", "");
return institutionNameComponent.getRorName(rorId);
} else if (organisationId.startsWith("https://www.wikidata.org/")) {
var wikidataId = organisationId.replace("https://www.wikidata.org/wiki/", "");
return institutionNameComponent.getWikiDataName(wikidataId);
} else {
throw new OrganisationException(organisationId + " is not a valid ror or wikidata identifier");
}
}

private List<EntityRelationships> assembleDigitalSpecimenEntityRelationships(
DigitalSpecimen ds) {
var relationships = new ArrayList<EntityRelationships>();
Expand Down Expand Up @@ -342,7 +354,8 @@ protected Identifications createIdentification(JsonNode data, boolean dwc) {
.withDwcTribe(termMapper.retrieveTerm(new Tribe(), data, dwc));
return new Identifications()
.withDwcIdentificationID(termMapper.retrieveTerm(new IdentificationId(), data, dwc))
.withDwcIdentificationVerificationStatus(parseToBoolean(new IdentificationVerificationStatus(), data, dwc))
.withDwcIdentificationVerificationStatus(
parseToBoolean(new IdentificationVerificationStatus(), data, dwc))
.withDwcTypeStatus(termMapper.retrieveTerm(new TypeStatus(), data, dwc))
.withDwcDateIdentified(termMapper.retrieveTerm(new DateIdentified(), data, dwc))
.withDwcIdentifiedBy(termMapper.retrieveTerm(new IdentifiedBy(), data, dwc))
Expand Down Expand Up @@ -513,19 +526,11 @@ private Boolean parseToBoolean(Term term, JsonNode data, boolean dwc) {
return null;
}

private String minifyOrganisationId(String organisationId) throws OrganisationNotRorId {
if (organisationId.startsWith("https://ror.org")) {
return organisationId.replace("https://ror.org/", "");
} else {
throw new OrganisationNotRorId(organisationId + " is not a valid ror");
}
}

public DigitalEntity assembleDigitalMediaObjects(boolean dwc,
JsonNode mediaRecord, String organisationId) throws OrganisationNotRorId {
JsonNode mediaRecord, String organisationId) throws OrganisationException {
var digitalMedioObject = new DigitalEntity()
.withDwcInstitutionId(organisationId)
.withDwcInstitutionName(rorComponent.getRorName(minifyOrganisationId(organisationId)))
.withDwcInstitutionName(getInstitutionName(organisationId))
.withAcAccessUri(termMapper.retrieveTerm(new AccessUri(), mediaRecord, dwc))
.withDctermsLicense(termMapper.retrieveTerm(new License(), mediaRecord, dwc))
.withDctermsFormat(termMapper.retrieveTerm(new Format(), mediaRecord, dwc))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.Profiles;
import eu.dissco.core.translator.component.RorComponent;
import eu.dissco.core.translator.component.InstitutionNameComponent;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
import eu.dissco.core.translator.schema.Citations;
Expand All @@ -22,7 +22,7 @@
public class BiocaseDigitalObjectDirector extends BaseDigitalObjectDirector {

public BiocaseDigitalObjectDirector(ObjectMapper mapper, TermMapper termMapper,
RorComponent rorComponent, WebClientProperties webClientProperties,
InstitutionNameComponent rorComponent, WebClientProperties webClientProperties,
FdoProperties fdoProperties) {
super(mapper, termMapper, rorComponent, webClientProperties, fdoProperties, identifierTerms());
}
Expand All @@ -35,8 +35,10 @@ private static List<String> identifierTerms() {
list.add("abcd:unitGUID");
list.add("abcd:recordURI");
// For now just look at the two first accession numbers as a shortcut
list.add("abcd:specimenUnit/accessions/accessionDateAndAccessionCatalogueAndAccessionNumber/0/value");
list.add("abcd:specimenUnit/accessions/accessionDateAndAccessionCatalogueAndAccessionNumber/1/value");
list.add(
"abcd:specimenUnit/accessions/accessionDateAndAccessionCatalogueAndAccessionNumber/0/value");
list.add(
"abcd:specimenUnit/accessions/accessionDateAndAccessionCatalogueAndAccessionNumber/1/value");
return list;
}

Expand All @@ -61,7 +63,9 @@ protected List<Identifications> assembleIdentifications(JsonNode data, boolean d
var iterateOverElements = true;
var count = 0;
while (iterateOverElements) {
var identificationNode = getSubJsonAbcd(data, count, "abcd:identifications/identification/");
var identificationNode = getSubJsonAbcd(data, count,
List.of("abcd:identifications/identification/",
"abcd-efg:identifications/identification/"));
if (!identificationNode.isEmpty()) {
identifications.add(createIdentification(identificationNode, dwc));
count++;
Expand All @@ -77,7 +81,7 @@ private ArrayList<Citations> getCitations(JsonNode data, boolean dwc, String sub
var iterateOverElements = true;
var count = 0;
while (iterateOverElements) {
var citationNode = getSubJsonAbcd(data, count, subPath);
var citationNode = getSubJsonAbcd(data, count, List.of(subPath));
if (!citationNode.isEmpty()) {
citations.add(super.createCitation(citationNode, dwc));
count++;
Expand All @@ -88,15 +92,15 @@ private ArrayList<Citations> getCitations(JsonNode data, boolean dwc, String sub
return citations;
}

private JsonNode getSubJsonAbcd(JsonNode data, int count, String path) {
private JsonNode getSubJsonAbcd(JsonNode data, int count, List<String> paths) {
var subNode = mapper.createObjectNode();
data.fields().forEachRemaining(field -> {
paths.forEach(path -> data.fields().forEachRemaining(field -> {
if (field.getKey().startsWith(path + count)) {
subNode.set(
field.getKey().replace(path + count + "/", ""),
field.getValue());
}
});
}));
return subNode;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.Profiles;
import eu.dissco.core.translator.component.RorComponent;
import eu.dissco.core.translator.component.InstitutionNameComponent;
import eu.dissco.core.translator.properties.FdoProperties;
import eu.dissco.core.translator.properties.WebClientProperties;
import eu.dissco.core.translator.schema.Citations;
Expand All @@ -23,7 +23,7 @@ public class DwcaDigitalObjectDirector extends BaseDigitalObjectDirector {
private static final String EXTENSION = "extensions";

public DwcaDigitalObjectDirector(ObjectMapper mapper, TermMapper termMapper,
RorComponent rorComponent, WebClientProperties webClientProperties,
InstitutionNameComponent rorComponent, WebClientProperties webClientProperties,
FdoProperties fdoProperties) {
super(mapper, termMapper, rorComponent, webClientProperties, fdoProperties, identifierTerms());
}
Expand Down
Loading

0 comments on commit 284678c

Please sign in to comment.