diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml index 4cec08564a4..eca8416732a 100644 --- a/.github/workflows/deploy_beta_testing.yml +++ b/.github/workflows/deploy_beta_testing.yml @@ -5,6 +5,10 @@ on: branches: - develop +concurrency: + group: deploy-beta-testing + cancel-in-progress: false + jobs: build: runs-on: ubuntu-latest diff --git a/doc/release-notes/11049-oai-identifiers-as-pids.md b/doc/release-notes/11049-oai-identifiers-as-pids.md new file mode 100644 index 00000000000..8b53a461a70 --- /dev/null +++ b/doc/release-notes/11049-oai-identifiers-as-pids.md @@ -0,0 +1,5 @@ +## When harvesting, Dataverse can now use the identifier from the OAI-PMH record header as the persistent id for the harvested dataset. + +This will allow harvesting from sources that do not include a persistent id in their oai_dc metadata records, but use valid dois or handles as the OAI-PMH record header identifiers. + +It is also possible to optionally configure a harvesting client to use this OAI-PMH identifier as the **preferred** choice for the persistent id. See the [Harvesting Clients API](https://guides.dataverse.org/en/6.5/api/native-api.html#create-a-harvesting-client) section of the Guides, #11049 and #10982 for more information. \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 9ab06f1a87a..1606eafbb48 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5254,6 +5254,7 @@ Shows a Harvesting Client with a defined nickname:: "dataverseAlias": "fooData", "nickName": "myClient", "set": "fooSet", + "useOaiIdentifiersAsPids": false "schedule": "none", "status": "inActive", "lastHarvest": "Thu Oct 13 14:48:57 EDT 2022", @@ -5288,6 +5289,7 @@ The following optional fields are supported: - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. - allowHarvestingMissingCVV: Flag to allow datasets to be harvested with Controlled Vocabulary Values that existed in the originating Dataverse Project but are not in the harvesting Dataverse Project. (Default is false). Currently only settable using API. +- useOaiIdentifiersAsPids: Defaults to false; if set to true, the harvester will attempt to use the identifier from the OAI-PMH record header as the **first choice** for the persistent id of the harvested dataset. When set to false, Dataverse will still attempt to use this identifier, but only if none of the `` entries in the OAI_DC record contain a valid persistent id (this is new as of v6.5). Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index f7843b8bf40..df9b6704209 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -22,14 +22,14 @@ Supported Metadata Detailed below are what metadata schemas we support for Citation and Domain Specific Metadata in the Dataverse Project: -- `Citation Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. -- `Geospatial Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. -- `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. -- `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) +- Citation Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. +- Geospatial Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. +- Social Science & Humanities Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. +- Astronomy and Astrophysics Metadata (`see .tsv `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) `VOResource Schema format `__ and is based on `Virtual Observatory (VO) Discovery and Provenance Metadata `__. -- `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. -- `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. +- Life Sciences Metadata (`see .tsv `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. +- Journal Metadata (`see .tsv `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. Experimental Metadata ~~~~~~~~~~~~~~~~~~~~~ @@ -37,7 +37,7 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! - `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) -- `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +- Computational Workflow Metadata (`see .tsv `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 3977023fc4b..02fb59751fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -792,6 +792,7 @@ private GlobalId getPIDFrom(DatasetVersion dsv, DvObject dv) { if (!dsv.getDataset().isHarvested() || HarvestingClient.HARVEST_STYLE_VDC.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) || HarvestingClient.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) + || HarvestingClient.HARVEST_STYLE_DEFAULT.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) || HarvestingClient.HARVEST_STYLE_DATAVERSE .equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())) { if(!isDirect()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 41a57665010..aa5b25e3967 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -150,12 +150,16 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping } - // Helper method for importing harvested Dublin Core xml. + // Helper methods for importing harvested Dublin Core xml. // Dublin Core is considered a mandatory, built in metadata format mapping. // It is distributed as required content, in reference_data.sql. // Note that arbitrary formatting tags are supported for the outer xml // wrapper. -- L.A. 4.5 public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException { + return processOAIDCxml(DcXmlToParse, null, false); + } + + public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier, boolean preferSuppliedIdentifier) throws XMLStreamException { // look up DC metadata mapping: ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS); @@ -185,18 +189,37 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED); - // Our DC import handles the contents of the dc:identifier field - // as an "other id". In the context of OAI harvesting, we expect - // the identifier to be a global id, so we need to rearrange that: + // In some cases, the identifier that we want to use for the dataset is + // already supplied to the method explicitly. For example, in some + // harvesting cases we'll want to use the OAI identifier (the identifier + // from the
section of the OAI record) for that purpose, without + // expecting to find a valid persistent id in the body of the DC record: - String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion()); - logger.fine("Imported identifier: "+identifier); + String globalIdentifier; - String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); - logger.fine("Detected global identifier: "+globalIdentifier); + if (oaiIdentifier != null) { + logger.fine("Attempting to use " + oaiIdentifier + " as the persistentId of the imported dataset"); + + globalIdentifier = reassignIdentifierAsGlobalId(oaiIdentifier, datasetDTO); + } else { + // Our DC import handles the contents of the dc:identifier field + // as an "other id". Unless we are using an externally supplied + // global id, we will be using the first such "other id" that we + // can parse and recognize as the global id for the imported dataset + // (note that this is the default behavior during harvesting), + // so we need to reaassign it accordingly: + String identifier = selectIdentifier(datasetDTO.getDatasetVersion(), oaiIdentifier, preferSuppliedIdentifier); + logger.fine("Imported identifier: " + identifier); + + globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); + logger.fine("Detected global identifier: " + globalIdentifier); + } if (globalIdentifier == null) { - throw new EJBException("Failed to find a global identifier in the OAI_DC XML record."); + String exceptionMsg = oaiIdentifier == null ? + "Failed to find a global identifier in the OAI_DC XML record." : + "Failed to parse the supplied identifier as a valid Persistent Id"; + throw new EJBException(exceptionMsg); } return datasetDTO; @@ -344,8 +367,20 @@ private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, St return value; } - private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { + public String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier) { + return selectIdentifier(datasetVersionDTO, suppliedIdentifier, false); + } + + private String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier, boolean preferSuppliedIdentifier) { List otherIds = new ArrayList<>(); + + if (suppliedIdentifier != null && preferSuppliedIdentifier) { + // This supplied identifier (in practice, his is likely the OAI-PMH + // identifier from the
section) will be our first + // choice candidate for the pid of the imported dataset: + otherIds.add(suppliedIdentifier); + } + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); @@ -363,6 +398,16 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { } } } + + if (suppliedIdentifier != null && !preferSuppliedIdentifier) { + // Unless specifically instructed to prefer this extra identifier + // (in practice, this is likely the OAI-PMH identifier from the + //
section), we will try to use it as the *last* + // possible candidate for the pid, so, adding it to the end of the + // list: + otherIds.add(suppliedIdentifier); + } + if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index ee4609a7c56..7dc2aed799e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -208,7 +208,13 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException { + public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, + HarvestingClient harvestingClient, + String harvestIdentifier, + String metadataFormat, + File metadataFile, + Date oaiDateStamp, + PrintWriter cleanupLog) throws ImportException, IOException { if (harvestingClient == null || harvestingClient.getDataverse() == null) { throw new ImportException("importHarvestedDataset called with a null harvestingClient, or an invalid harvestingClient."); } @@ -244,8 +250,8 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve } else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) { logger.fine("importing DC "+metadataFile.getAbsolutePath()); try { - String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); - dsDTO = importGenericService.processOAIDCxml(xmlToParse); + String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); + dsDTO = importGenericService.processOAIDCxml(xmlToParse, harvestIdentifier, harvestingClient.isUseOaiIdentifiersAsPids()); } catch (IOException | XMLStreamException e) { throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 0667f5594ce..7280b6af129 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -252,8 +252,16 @@ public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) { this.allowHarvestingMissingCVV = allowHarvestingMissingCVV; } - // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 - // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 + private boolean useOaiIdAsPid; + + public boolean isUseOaiIdentifiersAsPids() { + return useOaiIdAsPid; + } + + public void setUseOaiIdentifiersAsPids(boolean useOaiIdAsPid) { + this.useOaiIdAsPid = useOaiIdAsPid; + } + @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @OrderBy("id") private List harvestHistory; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 3f60317655a..232b7431a24 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -1052,6 +1052,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setHarvestingSet(obj.getString("set",null)); harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); harvestingClient.setAllowHarvestingMissingCVV(obj.getBoolean("allowHarvestingMissingCVV", false)); + harvestingClient.setUseOaiIdentifiersAsPids(obj.getBoolean("useOaiIdentifiersAsPids", false)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index f884d313d64..91af13c79a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1013,6 +1013,7 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). add("customHeaders", harvestingClient.getCustomHttpHeaders()). add("allowHarvestingMissingCVV", harvestingClient.getAllowHarvestingMissingCVV()). + add("useOaiIdentifiersAsPids", harvestingClient.isUseOaiIdentifiersAsPids()). add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). add("lastResult", harvestingClient.getLastResult()). add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). diff --git a/src/main/resources/db/migration/V6.4.0.3.sql b/src/main/resources/db/migration/V6.4.0.3.sql new file mode 100644 index 00000000000..307d8ed206c --- /dev/null +++ b/src/main/resources/db/migration/V6.4.0.3.sql @@ -0,0 +1,2 @@ +-- Add this boolean flag to accommodate a new harvesting client feature +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS useOaiIdAsPid BOOLEAN DEFAULT FALSE; diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 505fe681363..fc224443a8e 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -582,7 +582,7 @@ - +
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java index 44739f3f62a..acf5d970358 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java @@ -1,6 +1,13 @@ package edu.harvard.iq.dataverse.api.imports; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; + +import org.apache.commons.io.FileUtils; +import com.google.gson.Gson; +import java.io.File; +import java.io.IOException; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -8,6 +15,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import java.nio.charset.StandardCharsets; + @ExtendWith(MockitoExtension.class) public class ImportGenericServiceBeanTest { @@ -15,7 +24,47 @@ public class ImportGenericServiceBeanTest { private ImportGenericServiceBean importGenericService; @Test - public void testReassignIdentifierAsGlobalId() { + void testIdentifierHarvestableWithOtherID() throws IOException { + // "otherIdValue" containing the value : doi:10.7910/DVN/TJCLKP + File file = new File("src/test/resources/json/importGenericWithOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + // junk or null + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "junk")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, null)); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://www.example.com")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://dataverse.org")); + } + + @Test + void testIdentifierHarvestableWithoutOtherID() throws IOException { + // Does not contain data of type "otherIdValue" + File file = new File("src/test/resources/json/importGenericWithoutOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + // non-URL + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "doi:10.7910/DVN/TJCLKP")); + assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "hdl:10.7910/DVN/TJCLKP")); + // HTTPS + assertEquals("https://doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://hdl.handle.net/10.7910/DVN/TJCLKP")); + // HTTP (no S) + assertEquals("http://doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://hdl.handle.net/10.7910/DVN/TJCLKP")); + // junk or null + assertNull(importGenericService.selectIdentifier(dto, "junk")); + assertNull(importGenericService.selectIdentifier(dto, null)); + assertNull(importGenericService.selectIdentifier(dto, "http://www.example.com")); + assertNull(importGenericService.selectIdentifier(dto, "https://dataverse.org")); + } + + @Test + void testReassignIdentifierAsGlobalId() { // non-URL assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("doi:10.7910/DVN/TJCLKP", new DatasetDTO())); assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("hdl:10.7910/DVN/TJCLKP", new DatasetDTO())); @@ -29,6 +78,8 @@ public void testReassignIdentifierAsGlobalId() { assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("http://hdl.handle.net/10.7910/DVN/TJCLKP", new DatasetDTO())); // junk assertNull(importGenericService.reassignIdentifierAsGlobalId("junk", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("http://www.example.com", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("https://dataverse.org", new DatasetDTO())); } } diff --git a/src/test/resources/json/importGenericWithOtherId.json b/src/test/resources/json/importGenericWithOtherId.json new file mode 100644 index 00000000000..af9241393e9 --- /dev/null +++ b/src/test/resources/json/importGenericWithOtherId.json @@ -0,0 +1,307 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "otherId", + "value": [ + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "my agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId2" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "doi:10.7910/DVN/TJCLKP" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + } diff --git a/src/test/resources/json/importGenericWithoutOtherId.json b/src/test/resources/json/importGenericWithoutOtherId.json new file mode 100644 index 00000000000..ceb2263c2cf --- /dev/null +++ b/src/test/resources/json/importGenericWithoutOtherId.json @@ -0,0 +1,258 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + }