-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Resolve identifier URIs to MarkLogic URIs
- Loading branch information
1 parent
b2f5aa7
commit 5a50e75
Showing
6 changed files
with
114 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import json | ||
from typing import NamedTuple | ||
|
||
from caselawclient.models.documents import DocumentURIString | ||
from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString | ||
|
||
|
||
class IdentifierResolutions(list["IdentifierResolution"]): | ||
""" | ||
A list of candidate MarkLogic documents which correspond to a Public UI uri | ||
MarkLogic returns a list of dictionaries; IdentifierResolution handles a single dictionary | ||
which corresponds to a single identifier to MarkLogic document mapping. | ||
see `xquery/resolve_from_identifier.xqy` and `resolve_from_identifier` in `Client.py` | ||
""" | ||
|
||
@staticmethod | ||
def from_marklogic_output(table: list[str]) -> "IdentifierResolutions": | ||
return IdentifierResolutions(list(IdentifierResolution.from_marklogic_output(row) for row in table)) | ||
|
||
def published(self) -> "IdentifierResolutions": | ||
"Filter the list so that only published documents are returned" | ||
return IdentifierResolutions(list(x for x in self if x.document_published)) | ||
|
||
|
||
class IdentifierResolution(NamedTuple): | ||
"""A single response from MarkLogic about a single identifier / document mapping""" | ||
|
||
identifier_uuid: str | ||
document_uri: MarkLogicDocumentURIString | ||
identifier_slug: DocumentURIString | ||
document_published: bool | ||
|
||
@staticmethod | ||
def from_marklogic_output(raw_row: str) -> "IdentifierResolution": | ||
row = json.loads(raw_row) | ||
return IdentifierResolution( | ||
identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"], | ||
document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]), | ||
identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]), | ||
document_published=row["documents.compiled_url_slugs.document_published"] == "true", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
xquery version "1.0-ml"; | ||
|
||
declare namespace xdmp="http://marklogic.com/xdmp"; | ||
declare variable $identifier_uri as xs:string external; | ||
declare variable $published_only as xs:int? external := 1; | ||
|
||
let $published_query := if ($published_only) then " AND document_published = 'true'" else "" | ||
let $query := "SELECT * from compiled_url_slugs WHERE (identifier_slug = @uri)" || $published_query | ||
|
||
return xdmp:sql( | ||
$query, | ||
"map", | ||
map:new(( | ||
map:entry("uri", $identifier_uri) | ||
)) | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from caselawclient.identifier_resolution import IdentifierResolutions | ||
|
||
raw_marklogic_resolutions = [ | ||
""" | ||
{"documents.compiled_url_slugs.identifier_uuid":"24b9a384-8bcf-4f20-996a-5c318f8dc657", | ||
"documents.compiled_url_slugs.document_uri":"/ewca/civ/2003/547.xml", | ||
"documents.compiled_url_slugs.identifier_slug":"ewca/civ/2003/54721", | ||
"documents.compiled_url_slugs.document_published":"false"} | ||
""", | ||
""" | ||
{"documents.compiled_url_slugs.identifier_uuid":"x", | ||
"documents.compiled_url_slugs.document_uri":"x", | ||
"documents.compiled_url_slugs.identifier_slug":"x", | ||
"documents.compiled_url_slugs.document_published":"true"} | ||
""", | ||
] | ||
|
||
|
||
def test_decoded_identifier(): | ||
decoded_resolutions = IdentifierResolutions.from_marklogic_output(raw_marklogic_resolutions) | ||
res = decoded_resolutions[0] | ||
assert res.identifier_uuid == "24b9a384-8bcf-4f20-996a-5c318f8dc657" | ||
assert res.document_uri == "/ewca/civ/2003/547.xml" | ||
assert res.identifier_slug == "ewca/civ/2003/54721" | ||
assert res.document_published == False # noqa: E712 | ||
|
||
|
||
def test_published(): | ||
decoded_resolutions = IdentifierResolutions.from_marklogic_output(raw_marklogic_resolutions) | ||
assert len(decoded_resolutions.published()) == 1 | ||
assert decoded_resolutions.published()[0] == decoded_resolutions[1] |