From a9ea14e5dbece9a17f374a6c3d6f80886991d3e0 Mon Sep 17 00:00:00 2001 From: Sam Leeflang Date: Tue, 19 Nov 2024 14:40:05 +0100 Subject: [PATCH 1/4] Add identifier metadata --- .../terms/BaseDigitalObjectDirector.java | 7 +- .../translator/terms/utils/AgentsUtils.java | 31 +++-- .../terms/utils/EntityRelationshipUtils.java | 3 +- .../terms/utils/IdentifierUtils.java | 108 ++++++++++++++++++ .../terms/utils/AgentUtilsTest.java | 3 +- .../utils/EntityRelationshipUtilsTest.java | 3 +- .../terms/utils/IdentifierUtilsTest.java | 76 ++++++++++++ 7 files changed, 210 insertions(+), 21 deletions(-) create mode 100644 src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java create mode 100644 src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java diff --git a/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java b/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java index e7a0c7a..117cf8b 100644 --- a/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java +++ b/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java @@ -16,6 +16,7 @@ import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_PERSON; import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent; import static eu.dissco.core.translator.terms.utils.EntityRelationshipUtils.addEntityRelationship; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -389,11 +390,7 @@ private List assembleIdentifiers(Js var identifiers = new ArrayList(); for (String identifierTerm : identifierTerms) { if (data.get(identifierTerm) != null) { - var identifier = new Identifier() - .withId(data.get(identifierTerm).asText()) - .withType("ods:Identifier") - .withDctermsTitle(identifierTerm) - .withDctermsIdentifier(data.get(identifierTerm).asText()); + var identifier = addIdentifier(data.get(identifierTerm).asText(), identifierTerm); identifiers.add(identifier); } } diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java index b89e42e..a6c4666 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java @@ -1,8 +1,12 @@ package eu.dissco.core.translator.terms.utils; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; + import eu.dissco.core.translator.domain.AgentRoleType; import eu.dissco.core.translator.schema.Agent; import eu.dissco.core.translator.schema.Agent.Type; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import eu.dissco.core.translator.schema.OdsHasRole; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -17,6 +21,11 @@ private AgentsUtils() { public static List addAgent(List currentAgents, String agentValue, String agentId, AgentRoleType role, Type type) { + return addAgent(currentAgents, agentValue, agentId, role, type, null); + } + + public static List addAgent(List currentAgents, String agentValue, String agentId, + AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) { var agents = new ArrayList(); if (currentAgents != null) { agents.addAll(currentAgents); @@ -24,16 +33,16 @@ public static List addAgent(List currentAgents, String agentValue, if (agentValue != null || agentId != null) { if ((agentValue != null && (agentValue.contains("&") || agentValue.contains("|"))) || ( agentId != null && (agentId.contains("&") || agentId.contains("|")))) { - handleMultipleAgents(agents, agentValue, agentId, role, type); + handleMultipleAgents(agents, agentValue, agentId, role, type, identifierStatus); } else { - constructAgent(agents, agentValue, agentId, role, type); + constructAgent(agents, agentValue, agentId, role, type, identifierStatus); } } return agents; } private static void constructAgent(List agents, String agentValue, String agentId, - AgentRoleType role, Type type) { + AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) { String agentName = agentValue; if (agentValue != null && agentValue.contains("http") && agentId == null) { agentId = agentValue; @@ -50,19 +59,17 @@ private static void constructAgent(List agents, String agentValue, String .withSchemaName(agentName) .withSchemaIdentifier(agentId) .withOdsHasRoles( - List.of(new eu.dissco.core.translator.schema.OdsHasRole().withType("schema:Role") + List.of(new OdsHasRole().withType("schema:Role") .withSchemaRoleName(role.getName()))); if (agentId != null) { - agent.withOdsHasIdentifiers(List.of( - new eu.dissco.core.translator.schema.Identifier().withId(agentId) - .withType("ods:Identifier") - .withDctermsIdentifier(agentId))); + agent.withOdsHasIdentifiers(List.of(addIdentifier(agentId, null, identifierStatus))); } agents.add(agent); } private static void handleMultipleAgents( - List agents, String agentValue, String agentId, AgentRoleType role, Type type) { + List agents, String agentValue, String agentId, AgentRoleType role, Type type, + OdsIdentifierStatus identifierStatus) { var ids = new String[0]; var agentValues = new String[0]; if (checkIfNeedsParsing(agentValue)) { @@ -74,7 +81,7 @@ private static void handleMultipleAgents( } if (agentValues.length == ids.length) { for (int i = 0; i < agentValues.length; i++) { - constructAgent(agents, agentValues[i], ids[i], role, type); + constructAgent(agents, agentValues[i], ids[i], role, type, identifierStatus); } } else if (agentValues.length > ids.length) { if (ids.length != 0) { @@ -83,7 +90,7 @@ private static void handleMultipleAgents( agentValue, agentId); } for (String agent : agentValues) { - constructAgent(agents, agent, null, role, type); + constructAgent(agents, agent, null, role, type, identifierStatus); } } else { if (agentValues.length != 0) { @@ -92,7 +99,7 @@ private static void handleMultipleAgents( agentValue, agentId); } for (String idValue : ids) { - constructAgent(agents, null, idValue, role, type); + constructAgent(agents, null, idValue, role, type, identifierStatus); } } } diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java index 0cb5d9a..8ad9a4b 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java @@ -2,6 +2,7 @@ import static eu.dissco.core.translator.domain.AgentRoleType.DATA_TRANSLATOR; import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent; import eu.dissco.core.translator.domain.RelationshipType; @@ -33,7 +34,7 @@ public static EntityRelationship addEntityRelationship( .withDwcRelationshipEstablishedDate(Date.from(Instant.now())); entityRelationship.setOdsHasAgents( addAgent(entityRelationship.getOdsHasAgents(), agentName, agentId, DATA_TRANSLATOR, - SCHEMA_SOFTWARE_APPLICATION)); + SCHEMA_SOFTWARE_APPLICATION, PREFERRED)); if (relatedResource.startsWith("http")) { try { entityRelationship.setOdsRelatedResourceURI(new URI(relatedResource)); diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java new file mode 100644 index 0000000..cb2e9af --- /dev/null +++ b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java @@ -0,0 +1,108 @@ +package eu.dissco.core.translator.terms.utils; + +import static eu.dissco.core.translator.schema.Identifier.DctermsType.DOI; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; + +import eu.dissco.core.translator.schema.Identifier; +import eu.dissco.core.translator.schema.Identifier.DctermsType; +import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Triple; + +@Slf4j +public class IdentifierUtils { + + private static final Map, Triple> map = getPrefixMap(); + + private IdentifierUtils() { + // This is a Utility class + } + + private static Map, Triple> getPrefixMap() { + var linkedMap = new LinkedHashMap, Triple>(); + linkedMap.put(List.of("https://doi.org"), + Triple.of(DOI, "DOI", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); + linkedMap.put(List.of("https://hdl.handle.net"), + Triple.of(HANDLE, "Handle", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); + linkedMap.put(List.of("http://www.wikidata.org", "https://www.wikidata.org"), + Triple.of(URL, "Wikidata", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of("http://orcid.org", "https://orcid.org"), + Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of("http", "https"), + Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of("urn:uuid"), + Triple.of(DctermsType.UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); + return linkedMap; + } + + public static Identifier addIdentifier(String identifierString) { + return addIdentifier(identifierString, null, null); + } + + public static Identifier addIdentifier(String identifierString, String identifierName) { + return addIdentifier(identifierString, identifierName, null); + } + + public static Identifier addIdentifier(String identifierString, String identifierName, + OdsIdentifierStatus identifierStatus) { + if (identifierString == null) { + return null; + } + var identifier = new Identifier() + .withId(identifierString) + .withType("ods:Identifier") + .withDctermsIdentifier(identifierString) + .withOdsIdentifierStatus(identifierStatus); + for (var entry : map.entrySet()) { + for (var prefix : entry.getKey()) { + if (identifierString.startsWith(prefix)) { + identifier.setDctermsType(entry.getValue().getLeft()); + identifier.setDctermsTitle(getDcTermsTitle(identifierName, entry.getValue().getMiddle())); + identifier.setOdsGupriLevel(entry.getValue().getRight()); + return identifier; + } + } + } + if (isValidUUID(identifierString)) { + identifier.setDctermsType(DctermsType.UUID); + identifier.setDctermsTitle(getDcTermsTitle(identifierName, "UUID")); + identifier.setOdsGupriLevel(GLOBALLY_UNIQUE_STABLE); + return identifier; + } else { + log.debug( + "Unable to recognise the type of identifier: {}. Assuming locally unique identifier", + identifierString); + identifier.setDctermsType(DctermsType.LOCALLY_UNIQUE_IDENTIFIER); + identifier.setDctermsTitle(identifierName); + identifier.setOdsGupriLevel(OdsGupriLevel.LOCALLY_UNIQUE_STABLE); + return identifier; + } + } + + private static String getDcTermsTitle(String identifierName, String defaultValue) { + if (identifierName != null) { + return identifierName; + } else { + return defaultValue; + } + } + + private static boolean isValidUUID(String identifierString) { + try { + UUID.fromString(identifierString); + return true; + } catch (IllegalArgumentException e) { + log.debug("Identifier {} is not a valid UUID", identifierString); + return false; + } + } +} diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java index 63a21fb..f6acb35 100644 --- a/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java +++ b/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java @@ -72,8 +72,7 @@ private static Agent createAgent(String name, String id) { .withSchemaRoleName(CREATOR.getName()))); if (id != null) { agent.withOdsHasIdentifiers( - List.of(new eu.dissco.core.translator.schema.Identifier().withType("ods:Identifier") - .withId(id).withDctermsIdentifier(id))); + List.of(IdentifierUtils.addIdentifier(id))); } return agent; } diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java index a270e1a..1fec948 100644 --- a/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java +++ b/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java @@ -4,6 +4,7 @@ import static eu.dissco.core.translator.domain.RelationshipType.HAS_FDO_TYPE; import static eu.dissco.core.translator.domain.RelationshipType.HAS_ORGANISATION_ID; import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; import static org.assertj.core.api.Assertions.assertThat; import eu.dissco.core.translator.domain.RelationshipType; @@ -74,7 +75,7 @@ private EntityRelationship createEntityRelationship(RelationshipType relationshi .withOdsRelatedResourceURI(relatedResourceURI) .withDwcRelationshipEstablishedDate(Date.from(Instant.now())) .withOdsHasAgents(AgentsUtils.addAgent(List.of(), APP_NAME, APP_PID, - DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION)); + DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION, PREFERRED)); } } diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java new file mode 100644 index 0000000..5b819c2 --- /dev/null +++ b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java @@ -0,0 +1,76 @@ +package eu.dissco.core.translator.terms.utils; + +import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.LOCALLY_UNIQUE_IDENTIFIER; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.UUID; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.LOCALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; +import static org.assertj.core.api.Assertions.assertThat; + +import eu.dissco.core.translator.schema.Identifier; +import eu.dissco.core.translator.schema.Identifier.DctermsType; +import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import java.util.stream.Stream; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +class IdentifierUtilsTest { + + + public static Stream identifierProvider() { + return Stream.of( + Arguments.of("https://www.wikidata.org/wiki/Q66581882", null, null, + createIdentifier("https://www.wikidata.org/wiki/Q66581882", URL, "Wikidata", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("https://hdl.handle.net/XXX-XXX-XXX", null, PREFERRED, + createIdentifier("https://hdl.handle.net/XXX-XXX-XXX", HANDLE, "Handle", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT, PREFERRED)), + Arguments.of("88e320d5-c47a-4288-b265-fa3c93c57440", "dwc:catalogueNumber", null, + createIdentifier("88e320d5-c47a-4288-b265-fa3c93c57440", UUID, "dwc:catalogueNumber", + GLOBALLY_UNIQUE_STABLE, null)), + Arguments.of("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", "dwca:ID", null, + createIdentifier("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", UUID, "dwca:ID", + GLOBALLY_UNIQUE_STABLE, null)), + Arguments.of("https://geocollections.info/specimen/126758", "abcd:unitGUID", null, + createIdentifier("https://geocollections.info/specimen/126758", URL, "abcd:unitGUID", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("AVES-071259", "dwc:occurrenceID", null, + createIdentifier("AVES-071259", LOCALLY_UNIQUE_IDENTIFIER, "dwc:occurrenceID", + LOCALLY_UNIQUE_STABLE, null)), + Arguments.of(null, null, null, null) + ); + } + + private static Identifier createIdentifier(String id, DctermsType type, String title, + OdsGupriLevel gupriLevel, OdsIdentifierStatus status) { + return new Identifier() + .withId(id) + .withType("ods:Identifier") + .withDctermsIdentifier(id) + .withDctermsTitle(title) + .withDctermsType(type) + .withOdsGupriLevel(gupriLevel) + .withOdsIdentifierStatus(status); + } + + @ParameterizedTest + @MethodSource("identifierProvider") + void testAddIdentifier(String identifier, String identifierName, OdsIdentifierStatus status, + Identifier expected) { + // When + var result = addIdentifier(identifier, identifierName, status); + + // Then + assertThat(result).isEqualTo(expected); + } +} From d695ab1c1ee3a7aa7f2e649dbc56e53f76b34cdb Mon Sep 17 00:00:00 2001 From: Sam Leeflang Date: Tue, 19 Nov 2024 15:41:31 +0100 Subject: [PATCH 2/4] Switch to Pattern match --- .../terms/utils/IdentifierUtils.java | 30 ++++++++++++------- .../terms/utils/IdentifierUtilsTest.java | 8 +++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java index cb2e9af..d5797a0 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java @@ -1,11 +1,14 @@ package eu.dissco.core.translator.terms.utils; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.ARK; import static eu.dissco.core.translator.schema.Identifier.DctermsType.DOI; import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.PURL; import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; +import static java.util.regex.Pattern.compile; import eu.dissco.core.translator.schema.Identifier; import eu.dissco.core.translator.schema.Identifier.DctermsType; @@ -15,31 +18,38 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.regex.Pattern; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Triple; @Slf4j public class IdentifierUtils { - private static final Map, Triple> map = getPrefixMap(); + private static final Map, Triple> map = getPrefixMap(); private IdentifierUtils() { // This is a Utility class } - private static Map, Triple> getPrefixMap() { - var linkedMap = new LinkedHashMap, Triple>(); - linkedMap.put(List.of("https://doi.org"), + private static Map, Triple> getPrefixMap() { + var linkedMap = new LinkedHashMap, Triple>(); + linkedMap.put(List.of(compile("^https?://doi.org")), Triple.of(DOI, "DOI", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); - linkedMap.put(List.of("https://hdl.handle.net"), + linkedMap.put(List.of(compile("^https?://hdl.handle.net")), Triple.of(HANDLE, "Handle", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); - linkedMap.put(List.of("http://www.wikidata.org", "https://www.wikidata.org"), + linkedMap.put(List.of(compile("^https?://www.wikidata.org")), Triple.of(URL, "Wikidata", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); - linkedMap.put(List.of("http://orcid.org", "https://orcid.org"), + linkedMap.put(List.of(compile("^https?://orcid.org")), Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); - linkedMap.put(List.of("http", "https"), + linkedMap.put(List.of(compile("^https?://orcid.org")), + Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("^https?://\\w+.\\w+/ark:/\\w+/\\w+")), + Triple.of(ARK, "ARK", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("https?://purl.org")), + Triple.of(PURL, "PURL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("^https?")), Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); - linkedMap.put(List.of("urn:uuid"), + linkedMap.put(List.of(compile("^urn:uuid")), Triple.of(DctermsType.UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); return linkedMap; } @@ -64,7 +74,7 @@ public static Identifier addIdentifier(String identifierString, String identifie .withOdsIdentifierStatus(identifierStatus); for (var entry : map.entrySet()) { for (var prefix : entry.getKey()) { - if (identifierString.startsWith(prefix)) { + if (prefix.matcher(identifierString).find()) { identifier.setDctermsType(entry.getValue().getLeft()); identifier.setDctermsTitle(getDcTermsTitle(identifierName, entry.getValue().getMiddle())); identifier.setOdsGupriLevel(entry.getValue().getRight()); diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java index 5b819c2..ce4b3f8 100644 --- a/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java +++ b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java @@ -1,7 +1,9 @@ package eu.dissco.core.translator.terms.utils; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.ARK; import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; import static eu.dissco.core.translator.schema.Identifier.DctermsType.LOCALLY_UNIQUE_IDENTIFIER; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.PURL; import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; import static eu.dissco.core.translator.schema.Identifier.DctermsType.UUID; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; @@ -44,6 +46,12 @@ public static Stream identifierProvider() { Arguments.of("https://geocollections.info/specimen/126758", "abcd:unitGUID", null, createIdentifier("https://geocollections.info/specimen/126758", URL, "abcd:unitGUID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", "dwc:catalogueNumber", null, + createIdentifier("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", ARK, "dwc:catalogueNumber", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("http://purl.org/dc/terms/accessRights", null, null, + createIdentifier("http://purl.org/dc/terms/accessRights", PURL, "PURL", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), Arguments.of("AVES-071259", "dwc:occurrenceID", null, createIdentifier("AVES-071259", LOCALLY_UNIQUE_IDENTIFIER, "dwc:occurrenceID", LOCALLY_UNIQUE_STABLE, null)), From ed287b443fb46f8e21e82302c35bcb8282cabfec Mon Sep 17 00:00:00 2001 From: Sam Leeflang Date: Tue, 19 Nov 2024 15:57:59 +0100 Subject: [PATCH 3/4] Add pattern match for UUID --- .../terms/utils/IdentifierUtils.java | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java index d5797a0..d2cd6b1 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java @@ -25,13 +25,13 @@ @Slf4j public class IdentifierUtils { - private static final Map, Triple> map = getPrefixMap(); + private static final Map, Triple> PATTERN_MAP = patternMap(); private IdentifierUtils() { // This is a Utility class } - private static Map, Triple> getPrefixMap() { + private static Map, Triple> patternMap() { var linkedMap = new LinkedHashMap, Triple>(); linkedMap.put(List.of(compile("^https?://doi.org")), Triple.of(DOI, "DOI", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); @@ -41,15 +41,14 @@ private static Map, Triple> ge Triple.of(URL, "Wikidata", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); linkedMap.put(List.of(compile("^https?://orcid.org")), Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); - linkedMap.put(List.of(compile("^https?://orcid.org")), - Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); linkedMap.put(List.of(compile("^https?://\\w+.\\w+/ark:/\\w+/\\w+")), Triple.of(ARK, "ARK", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); linkedMap.put(List.of(compile("https?://purl.org")), Triple.of(PURL, "PURL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); linkedMap.put(List.of(compile("^https?")), Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); - linkedMap.put(List.of(compile("^urn:uuid")), + linkedMap.put(List.of(compile( + "(uuid:)*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}")), Triple.of(DctermsType.UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); return linkedMap; } @@ -72,7 +71,7 @@ public static Identifier addIdentifier(String identifierString, String identifie .withType("ods:Identifier") .withDctermsIdentifier(identifierString) .withOdsIdentifierStatus(identifierStatus); - for (var entry : map.entrySet()) { + for (var entry : PATTERN_MAP.entrySet()) { for (var prefix : entry.getKey()) { if (prefix.matcher(identifierString).find()) { identifier.setDctermsType(entry.getValue().getLeft()); @@ -82,20 +81,13 @@ public static Identifier addIdentifier(String identifierString, String identifie } } } - if (isValidUUID(identifierString)) { - identifier.setDctermsType(DctermsType.UUID); - identifier.setDctermsTitle(getDcTermsTitle(identifierName, "UUID")); - identifier.setOdsGupriLevel(GLOBALLY_UNIQUE_STABLE); - return identifier; - } else { - log.debug( - "Unable to recognise the type of identifier: {}. Assuming locally unique identifier", - identifierString); - identifier.setDctermsType(DctermsType.LOCALLY_UNIQUE_IDENTIFIER); - identifier.setDctermsTitle(identifierName); - identifier.setOdsGupriLevel(OdsGupriLevel.LOCALLY_UNIQUE_STABLE); - return identifier; - } + log.debug( + "Unable to recognise the type of identifier: {}. Assuming locally unique identifier", + identifierString); + identifier.setDctermsType(DctermsType.LOCALLY_UNIQUE_IDENTIFIER); + identifier.setDctermsTitle(identifierName); + identifier.setOdsGupriLevel(OdsGupriLevel.LOCALLY_UNIQUE_STABLE); + return identifier; } private static String getDcTermsTitle(String identifierName, String defaultValue) { From a4722f4149175705eebb9719709f18fbaaa2e582 Mon Sep 17 00:00:00 2001 From: Sam Leeflang Date: Tue, 19 Nov 2024 16:03:34 +0100 Subject: [PATCH 4/4] Remove unused method --- .../terms/utils/IdentifierUtils.java | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java index d2cd6b1..f69489b 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java @@ -1,10 +1,6 @@ package eu.dissco.core.translator.terms.utils; -import static eu.dissco.core.translator.schema.Identifier.DctermsType.ARK; -import static eu.dissco.core.translator.schema.Identifier.DctermsType.DOI; -import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; -import static eu.dissco.core.translator.schema.Identifier.DctermsType.PURL; -import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.*; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; @@ -17,7 +13,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.UUID; import java.util.regex.Pattern; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Triple; @@ -49,7 +44,7 @@ private static Map, Triple> pa Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); linkedMap.put(List.of(compile( "(uuid:)*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}")), - Triple.of(DctermsType.UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); + Triple.of(UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); return linkedMap; } @@ -97,14 +92,4 @@ private static String getDcTermsTitle(String identifierName, String defaultValue return defaultValue; } } - - private static boolean isValidUUID(String identifierString) { - try { - UUID.fromString(identifierString); - return true; - } catch (IllegalArgumentException e) { - log.debug("Identifier {} is not a valid UUID", identifierString); - return false; - } - } }