Skip to content

Commit

Permalink
Use corb2 to set identifiers for all primary documents that aren't fa…
Browse files Browse the repository at this point in the history
…ilures
  • Loading branch information
dragon-dxw committed Dec 16, 2024
1 parent 4932f70 commit f9a9257
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ gradle-*.properties
!gradle-development.properties
__pycache__
node_modules/
corb2/*.jar
corb2/*.log
7 changes: 7 additions & 0 deletions corb2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
`brew install temurin` to get a working Java (I had to install-uninstall-install)

Download corb2 from https://github.com/marklogic-community/corb2/releases
Download xcc from https://repo1.maven.org/maven2/com/marklogic/marklogic-xcc/11.1.0/ or https://developer.marklogic.com/products/xcc-2/ (I used maven)
(both into this directory)

`corb migrate-ncn` will run the code, `migrate-ncn.log` will show what it writes to the identifiers
1 change: 1 addition & 0 deletions corb2/corb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
java -server -cp .:marklogic-xcc-11.1.0.jar:marklogic-corb-2.5.6.jar -DOPTIONS-FILE=$1.properties com.marklogic.developer.corb.Manager xcc://admin:admin@localhost:8011
4 changes: 4 additions & 0 deletions corb2/get-uris.xqy
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
let $uris := cts:uris("",(),cts:collection-query(
("http://marklogic.com/collections/dls/latest-version")
))
return (count($uris), $uris)
9 changes: 9 additions & 0 deletions corb2/migrate-ncn.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
MODULES-DATABASE=caselaw-modules
INSTALL = 1

URIS-MODULE=get-uris.xqy
PROCESS-MODULE=migrate-ncn.xqy

PROCESS-TASK=com.marklogic.developer.corb.ExportBatchToFileTask
EXPORT-FILE-NAME=migrate-ncn.log
THREAD-COUNT=10
34 changes: 34 additions & 0 deletions corb2/migrate-ncn.xqy
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";

import module namespace sem = "http://marklogic.com/semantics"
at "/MarkLogic/semantics.xqy";

(: This is intended to be a migration script that runs once;
it should not be run on a database which already has identifiers :)

declare variable $URI external;

let $cite := fn:doc($URI)//uk:cite/text()
let $slug := fn:replace(
fn:replace($URI, "\.xml$", "")
, "^/", "")
let $log := ("")
let $uuid := "id-"||sem:uuid-string()
let $log := ($log, "Processing", $URI, $cite, $uuid)
let $node :=
<identifiers><identifier>
<namespace>ukncn</namespace>
<uuid>{$uuid}</uuid>
<value>{$cite}</value>
<url_slug>{$slug}</url_slug>
</identifier></identifiers>
let $log := ($log, xdmp:quote($node))

let $log := ($log, if
(fn:starts-with($URI, "/failures/") or fn:starts-with($URI, "/collisions/"))
then
"ignored as failure/collision"
else
"set property" || xdmp:document-set-property($URI, $node))

return string-join($log, "&#10;")

0 comments on commit f9a9257

Please sign in to comment.