diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index b54dfffd1d2..1d4387671f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -7,15 +7,8 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.io.*; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJB; @@ -23,6 +16,16 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.TypedQuery; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.text.StringEscapeUtils; @@ -30,6 +33,8 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** * @@ -561,9 +566,69 @@ public String generateXML(DvObject dvObject) { xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); + + xmlMetadata = this.addFundingReferences(dvObject, xmlMetadata); + return xmlMetadata; } + private String addFundingReferences(DvObject dvObject, String xmlMetadata) { + try { + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + List> grantNumberChildValues = this.extractGrantNumberValues(dataset); + if (!grantNumberChildValues.isEmpty()) { + org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); + xmlDocument = this.appendFundingReferences(grantNumberChildValues, xmlDocument); + xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); + } + } + } catch(Exception e) { + logger.log(Level.SEVERE, "Error adding fundingReferences to the DataCite Metadata: {0}", e.getMessage()); + } + return xmlMetadata; + } + + private List> extractGrantNumberValues(Dataset dataset) { + List> grantNumberChildValues = new ArrayList<>(); + List grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber); + //There should only be one DatasetField with name 'grantNumber' (Premise: There are values for grantNumber) + if(!grantNumberDatasetFields.isEmpty()){ + DatasetField grantNumber = grantNumberDatasetFields.get(0); + grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(grantNumber); + } + return grantNumberChildValues; + } + + /** + *
+     * Appends fundingReferences to the DataCite xml.
+     * Mappings:
+     * - grantNumberAgency -> funderName
+     * - grantNumberValue -> awardNumber
+     * 
+ * + * @param grantNumberChildValues + * @param xmlDocument + * @return The xmlDocument with fundingReferences + */ + private org.w3c.dom.Document appendFundingReferences(List> grantNumberChildValues, org.w3c.dom.Document xmlDocument) { + for (Map childValue : grantNumberChildValues) { + // funderName (=grantNumberAgency) is a required subfield of fundingReference + if (childValue.containsKey(DatasetFieldConstant.grantNumberAgency)) { + if(xmlDocument.getElementsByTagName("fundingReferences").getLength() == 0){ + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); + } + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", childValue.get(DatasetFieldConstant.grantNumberAgency)); + if (childValue.containsKey(DatasetFieldConstant.grantNumberValue)) { + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", childValue.get(DatasetFieldConstant.grantNumberValue)); + } + } + } + return xmlDocument; + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); @@ -726,3 +791,83 @@ public static String getStrFromList(List authors) { } } + +class DataCiteMetadataUtil { + + public static org.w3c.dom.Document parseXml(String xml) throws ParserConfigurationException, IOException, SAXException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + org.w3c.dom.Document document = builder.parse(new InputSource(new StringReader(xml))); + + return document; + } + + /** + * Append Element to the last parent element in order. + * + * @param document + * @param parentTagName + * @param tagName + * @param textContent + */ + public static void appendElementToDocument(org.w3c.dom.Document document, String parentTagName, String tagName, String textContent) { + org.w3c.dom.Element element = document.createElement(tagName); + if(textContent != null && !textContent.isEmpty()) { + element.setTextContent(textContent); + } + org.w3c.dom.NodeList parentElements = document.getElementsByTagName(parentTagName); + if(parentElements.getLength() > 0){ + org.w3c.dom.Element lastParentElement = (org.w3c.dom.Element) parentElements.item(parentElements.getLength() - 1); + lastParentElement.appendChild(element); + } + } + + public static String prettyPrintXML(org.w3c.dom.Document document, int indent) throws TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + InputStream inputStream = DataCiteMetadataTemplate.class.getResourceAsStream("prettyprint.xsl"); + String prettyPrintXsl = Util.readAndClose(inputStream, "utf-8"); + Transformer transformer = transformerFactory.newTransformer(new StreamSource(new StringReader(prettyPrintXsl))); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); + transformer.setOutputProperty(OutputKeys.STANDALONE, "no"); + + StringWriter stringWriter = new StringWriter(); + transformer.transform(new DOMSource(document), new StreamResult(stringWriter)); + return stringWriter.toString(); + } + + /** + * Search for a fist-level DatasetFields by name. + * + * @param dataset + * @param datasetFieldName + * @return List of DatasetFields with the given name. + */ + public static List searchForFirstLevelDatasetFields(Dataset dataset, String datasetFieldName) { + List datasetFields = new ArrayList<>(); + for (DatasetField datasetField : dataset.getLatestVersion().getDatasetFields()) { + if (datasetField.getDatasetFieldType().getName().equals(datasetFieldName)) { + datasetFields.add(datasetField); + } + } + return datasetFields; + } + + public static List> extractCompoundValueChildDatasetFieldValues(DatasetField datasetField){ + List> fieldValues = new ArrayList<>(); + for (DatasetFieldCompoundValue compoundValue : datasetField.getDatasetFieldCompoundValues()) { + fieldValues.add(DataCiteMetadataUtil.extractChildDatasetFieldValues(compoundValue)); + } + return fieldValues; + } + + public static Map extractChildDatasetFieldValues(DatasetFieldCompoundValue datasetFieldCompoundValue) { + Map datasetFieldValues = new HashMap<>(); + for (DatasetField childDatasetField : datasetFieldCompoundValue.getChildDatasetFields()) { + datasetFieldValues.put(childDatasetField.getDatasetFieldType().getName(), childDatasetField.getValue()); + } + return datasetFieldValues; + } + +} diff --git a/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl b/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl new file mode 100644 index 00000000000..3941269f403 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file