From 64eea3482ab0b399b1b62ae868d94d04a391991e Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 1 Mar 2024 14:45:45 +0100 Subject: [PATCH 1/4] Remove the xml info and bibtex tag from the citation refbox --- .../app/rest/ClarinRefBoxController.java | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java index 8c8490f1d53e..c9e8ec59b314 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java @@ -21,6 +21,8 @@ import java.util.Map; import java.util.Objects; import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -77,6 +79,9 @@ @RestController @RequestMapping("/api/core/refbox") public class ClarinRefBoxController { + + private final String XML_FILE_START = ""; + private final String BIBTEX_TAG = "bib:bibtex"; private final Logger log = org.apache.logging.log4j.LogManager.getLogger(ClarinRefBoxController.class); @Autowired @@ -228,7 +233,7 @@ public ResponseEntity getCitationText(@RequestParam(name = "type") String type, XmlOutputContext xmlOutContext = XmlOutputContext.emptyContext(output); xmlOutContext.getWriter().writeStartDocument(); - //Try to obtain just the metadata, if that fails return "normal" response + // Try to obtain just the metadata, if that fails return "normal" response try { oaipmh.getInfo().getGetRecord().getRecord().getMetadata().write(xmlOutContext); } catch (Exception e) { @@ -246,8 +251,10 @@ public ResponseEntity getCitationText(@RequestParam(name = "type") String type, response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, "Unexpected error while writing the output. For more information visit the log files."); } catch (XOAIManagerResolverException e) { - throw new ServletException("OAI 2.0 wasn't correctly initialized," + - " please check the log for previous errors", e); + String errMessage = "OAI 2.0 wasn't correctly initialized, please check the log for previous errors. " + + "Error message: " + e.getMessage(); + log.error(errMessage); + throw new ServletException(errMessage); } catch (OAIException e) { response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, "Unexpected error. For more information visit the log files."); @@ -263,8 +270,11 @@ public ResponseEntity getCitationText(@RequestParam(name = "type") String type, HttpStatus.valueOf(HttpServletResponse.SC_NO_CONTENT)); } + // Update the output string and remove the unwanted parts. + String outputString = updateOutput(output.toString()); + // Wrap the String output to the class for better parsing in the FE - OaiMetadataWrapper oaiMetadataWrapper = new OaiMetadataWrapper(output.toString()); + OaiMetadataWrapper oaiMetadataWrapper = new OaiMetadataWrapper(outputString); return new ResponseEntity<>(oaiMetadataWrapper, HttpStatus.valueOf(SC_OK)); } @@ -331,6 +341,53 @@ private List mapFeaturedServiceLinks(List tag. + */ + private String updateOutput(String output) { + String outputString = ""; + // Remove from the output. + if (output != null && output.startsWith(XML_FILE_START)) { + // Remove XML_FILE_START from the start of the string. + outputString = output.substring(XML_FILE_START.length()); + } + + // If bibtex, remove the in the start and the end of the string. + outputString = this.removeBibtexTag(outputString); + return outputString; + } + + /** + * Remove the tag from the string. + */ + private String removeBibtexTag(String xml) { + String outputString = xml; + String openingTagPattern = String.format("<%s[^>]*>", BIBTEX_TAG); + String closingTagPattern = ">"; + String endBibtexTagPattern = String.format("", BIBTEX_TAG); + + Pattern openingTagRegex = Pattern.compile(openingTagPattern); + Pattern closingTagRegex = Pattern.compile(closingTagPattern); + + Matcher openingTagMatcher = openingTagRegex.matcher(outputString); + Matcher closingTagMatcher = closingTagRegex.matcher(outputString); + + if (openingTagMatcher.find() && closingTagMatcher.find()) { + int openingTagStart = openingTagMatcher.start(); + int closingTagEnd = closingTagMatcher.end(); + + // Remove the tag from the start of the string. + outputString = outputString.substring(0, openingTagStart) + outputString.substring(closingTagEnd); + // Remove the tag from the end of the string. + outputString = outputString.replace(endBibtexTagPattern, ""); + // Remove empty spaces from the start and the end of the string. + return outputString.trim(); + } else { + // Tags not found or mismatched + return xml; + } + } + } /** From d1d9834b5ef2dc74967ff5be40f8384822d831e1 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 1 Mar 2024 16:01:00 +0100 Subject: [PATCH 2/4] Show handle and DOI in the Item View following the new cfg property --- dspace/config/clarin-dspace.cfg | 5 +++++ dspace/config/modules/rest.cfg | 1 + 2 files changed, 6 insertions(+) diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 7f47d9896cd5..a13a684a4118 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -252,3 +252,8 @@ s3.upload.by.parts.enabled = true ### The build version is stored in the specific file ### build.version.file.path = ${dspace.dir}/config/VERSION_D.txt + + +#### Item View #### +# Show handle and doi as identifiers - show only DOI if it exists instead of handle by default +item-page.show-handle-and-doi = false \ No newline at end of file diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index d17c6dc6bf2c..32dcc0fb1e67 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -70,6 +70,7 @@ rest.properties.exposed = themed.by.company.name rest.properties.exposed = identifier.doi.resolver rest.properties.exposed = spring.servlet.multipart.max-file-size rest.properties.exposed = authentication-shibboleth.show.idp-attributes +rest.properties.exposed = item-page.show-handle-and-doi # TUL rest.properties.exposed = dspace.ui.url From f0f04cb2ed3b3662f80fa61906a8180f0629b292 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 13 Mar 2024 15:49:40 +0100 Subject: [PATCH 3/4] Made updating xml string more generic --- .../app/rest/ClarinRefBoxController.java | 82 ++++++++++--------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java index c9e8ec59b314..90d258baa06f 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java @@ -27,6 +27,9 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.NotFoundException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.stream.XMLStreamException; import com.hp.hpl.jena.rdf.model.Model; @@ -69,6 +72,9 @@ import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; /** * A Controller for fetching the data for the ref-box in the Item View (FE). @@ -80,8 +86,8 @@ @RequestMapping("/api/core/refbox") public class ClarinRefBoxController { - private final String XML_FILE_START = ""; - private final String BIBTEX_TAG = "bib:bibtex"; + private final static String BIBTEX_TYPE = "bibtex"; + private final Logger log = org.apache.logging.log4j.LogManager.getLogger(ClarinRefBoxController.class); @Autowired @@ -271,7 +277,7 @@ public ResponseEntity getCitationText(@RequestParam(name = "type") String type, } // Update the output string and remove the unwanted parts. - String outputString = updateOutput(output.toString()); + String outputString = updateOutput(type, output.toString()); // Wrap the String output to the class for better parsing in the FE OaiMetadataWrapper oaiMetadataWrapper = new OaiMetadataWrapper(outputString); @@ -344,50 +350,52 @@ private List mapFeaturedServiceLinks(List tag. */ - private String updateOutput(String output) { - String outputString = ""; - // Remove from the output. - if (output != null && output.startsWith(XML_FILE_START)) { - // Remove XML_FILE_START from the start of the string. - outputString = output.substring(XML_FILE_START.length()); + private String updateOutput(String type, String output) { + try { + if (StringUtils.equals(type, BIBTEX_TYPE)) { + return removeBibtexTag(output); + } else { + return removeXmlHeaderTag(output); + } + } catch (Exception e) { + e.printStackTrace(); + return null; } - - // If bibtex, remove the in the start and the end of the string. - outputString = this.removeBibtexTag(outputString); - return outputString; } /** - * Remove the tag from the string. + * Remove the XML header tag from the string. + * + * @param xml + * @return */ - private String removeBibtexTag(String xml) { - String outputString = xml; - String openingTagPattern = String.format("<%s[^>]*>", BIBTEX_TAG); - String closingTagPattern = ">"; - String endBibtexTagPattern = String.format("", BIBTEX_TAG); - - Pattern openingTagRegex = Pattern.compile(openingTagPattern); - Pattern closingTagRegex = Pattern.compile(closingTagPattern); - - Matcher openingTagMatcher = openingTagRegex.matcher(outputString); - Matcher closingTagMatcher = closingTagRegex.matcher(outputString); - - if (openingTagMatcher.find() && closingTagMatcher.find()) { - int openingTagStart = openingTagMatcher.start(); - int closingTagEnd = closingTagMatcher.end(); - - // Remove the tag from the start of the string. - outputString = outputString.substring(0, openingTagStart) + outputString.substring(closingTagEnd); - // Remove the tag from the end of the string. - outputString = outputString.replace(endBibtexTagPattern, ""); - // Remove empty spaces from the start and the end of the string. - return outputString.trim(); + private String removeXmlHeaderTag(String xml) { + String xmlHeaderPattern = "<\\?xml[^>]*\\?>"; + Pattern xmlHeaderRegex = Pattern.compile(xmlHeaderPattern); + Matcher xmlHeaderMatcher = xmlHeaderRegex.matcher(xml); + if (xmlHeaderMatcher.find()) { + return xml.replaceFirst(xmlHeaderPattern, ""); } else { - // Tags not found or mismatched return xml; } } + /** + * Remove the tag from the string. + */ + private String removeBibtexTag(String xml) throws ParserConfigurationException, IOException, SAXException { + // Parse the XML string + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(new org.xml.sax.InputSource(new java.io.StringReader(xml))); + + // Get the root element + Node root = document.getDocumentElement(); + + // Get the text content of the root element + return root.getTextContent().trim(); + } + } /** From bb515cb6f3cd3ac9f38e417201bf5c8df073cf80 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Thu, 14 Mar 2024 10:14:25 +0100 Subject: [PATCH 4/4] Updated the method name for the getting text content from the bibtex xml. --- .../dspace/app/rest/ClarinRefBoxController.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java index 90d258baa06f..710a96648152 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinRefBoxController.java @@ -280,7 +280,7 @@ public ResponseEntity getCitationText(@RequestParam(name = "type") String type, String outputString = updateOutput(type, output.toString()); // Wrap the String output to the class for better parsing in the FE - OaiMetadataWrapper oaiMetadataWrapper = new OaiMetadataWrapper(outputString); + OaiMetadataWrapper oaiMetadataWrapper = new OaiMetadataWrapper(StringUtils.defaultIfEmpty(outputString, "")); return new ResponseEntity<>(oaiMetadataWrapper, HttpStatus.valueOf(SC_OK)); } @@ -348,17 +348,19 @@ private List mapFeaturedServiceLinks(List tag. + * Remove the unnecessary parts from the output. */ private String updateOutput(String type, String output) { try { if (StringUtils.equals(type, BIBTEX_TYPE)) { - return removeBibtexTag(output); + // Remove the XML header tag and the tag from the string. + return getXmlTextContent(output); } else { + // Remove the XML header tag from the string. return removeXmlHeaderTag(output); } } catch (Exception e) { - e.printStackTrace(); + log.error("Cannot update the xml string for citation because of: " + e.getMessage()); return null; } } @@ -381,9 +383,9 @@ private String removeXmlHeaderTag(String xml) { } /** - * Remove the tag from the string. + * Get the text content from the xml string. */ - private String removeBibtexTag(String xml) throws ParserConfigurationException, IOException, SAXException { + private String getXmlTextContent(String xml) throws ParserConfigurationException, IOException, SAXException { // Parse the XML string DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); @@ -395,7 +397,6 @@ private String removeBibtexTag(String xml) throws ParserConfigurationException, // Get the text content of the root element return root.getTextContent().trim(); } - } /**