From 5c2e45252d3cc68c3e1b47d8e7e6ccfe5da3d200 Mon Sep 17 00:00:00 2001 From: Vincent Lizzi <vincent.lizzi@taylorandfrancis.com> Date: Tue, 2 Aug 2022 16:40:36 -0400 Subject: [PATCH] improve resolver:parse-xml --- xml-catalog-resolver.xqm | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/xml-catalog-resolver.xqm b/xml-catalog-resolver.xqm index f1c1078..47ae1f6 100644 --- a/xml-catalog-resolver.xqm +++ b/xml-catalog-resolver.xqm @@ -162,7 +162,7 @@ declare function resolver:resolveURI($uri as xs:string, $catalog as xs:string) a : @return parsed XML document :) declare function resolver:parse-xml($xml as xs:string, $catalog as xs:string) as document-node() { - let $temp := file:create-temp-file('catalog-resolver', '.xml') + let $temp := file:create-temp-file('xml-catalog-resolver', '.xml') let $raw := if ($xml castable as xs:anyURI) then unparsed-text($xml) else $xml let $resolved := resolver:resolveDOCTYPE($raw, $catalog) return ( @@ -173,6 +173,25 @@ declare function resolver:parse-xml($xml as xs:string, $catalog as xs:string) as }; +(:~ + : Parse XML using XML Catalog + : + : @param $xml an XML string or file path to the XML file + : @param $catalog Semicolon-separated list of XML catalog files. Absolute file path works best. + : @param $path File path to a location where the XML will be written before being parsed in order to control base-uri() + : + : @return parsed XML document + :) +declare function resolver:parse-xml($xml as xs:string, $catalog as xs:string, $path as xs:string) as document-node() { + let $raw := if ($xml castable as xs:anyURI) then unparsed-text($xml) else $xml + let $resolved := resolver:resolveDOCTYPE($raw, $catalog) + return ( + file:write-text($path, $resolved), + (# db:dtd true #) (# db:intparse false #) (# db:chop false #) { doc($path) } + ) +}; + + (:~ : Modifies a DOCTYPE to remove a PUBLIC or SYSTEM reference to an external DTD. : If the DOCTYPE contains an internal DTD then the internal part will remain intact. @@ -324,6 +343,21 @@ declare %unit:test function resolver:test_parse-xml() { }; +declare %unit:test function resolver:test_parse-xml3() { + let $base := file:base-dir() + let $catalog := file:resolve-path("test/catalog1.xml", $base) + let $examplexml := file:resolve-path("test/example.xml", $base) + let $tempDir := file:create-temp-dir('xml-catalog-resolver', 'test') + let $tempFile := $tempDir || 'example.xml' + let $result := resolver:parse-xml($examplexml, $catalog, $tempFile) + return ( + unit:assert-equals($result, document{<example att="default">expansion from external DTD</example>}), + unit:assert(file:exists($tempFile)), + file:delete($tempDir, true()) + ) +}; + + declare %unit:test function resolver:test_removeExternalDTD() { let $example := '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE catalog PUBLIC "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN" "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd" []><catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"><uri name="https://example.com/file.txt" uri="file.txt"/></catalog>' let $expected := '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE catalog []><catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"><uri name="https://example.com/file.txt" uri="file.txt"/></catalog>'