Skip to content

Commit

Permalink
Resolve identifier URIs to MarkLogic URIs
Browse files Browse the repository at this point in the history
  • Loading branch information
dragon-dxw committed Dec 6, 2024
1 parent abad449 commit 54b7536
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 4 deletions.
2 changes: 1 addition & 1 deletion script/build_xquery_type_dicts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def ml_type_to_python_type_declaration(variable_name: str, variable_type: str):
variable_type = "MarkLogicDocumentVersionURIString"
elif variable_name == "privilege_uri":
variable_type = "MarkLogicPrivilegeURIString"
elif variable_name == "parent_uri":
elif variable_name in ["identifier_uri", "parent_uri"]:
variable_type = "DocumentURIString"
elif variable_name == "uri" or variable_name.endswith("_uri"):
variable_type = "MarkLogicDocumentURIString"
Expand Down
13 changes: 13 additions & 0 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from caselawclient import xquery_type_dicts as query_dicts
from caselawclient.client_helpers import VersionAnnotation
from caselawclient.identifiers import IdentifierResolutions
from caselawclient.models.documents import (
DOCUMENT_COLLECTION_URI_JUDGMENT,
DOCUMENT_COLLECTION_URI_PRESS_SUMMARY,
Expand Down Expand Up @@ -1201,3 +1202,15 @@ def get_recently_parsed(
)

return results

def resolve_from_identifier(self, identifier_uri: str) -> IdentifierResolutions:
"""Given a PUI/EUI url, look up the precomputed slug and return the
MarkLogic document URIs which match that slug. Multiple returns should be anticipated"""
vars: query_dicts.ResolveFromIdentifierDict = {"identifier_uri": DocumentURIString(identifier_uri)}
raw_results: list[str] = get_multipart_strings_from_marklogic_response(
self._send_to_eval(
vars,
"resolve_from_identifier.xqy",
),
)
return IdentifierResolutions.from_marklogic_output(raw_results)
31 changes: 31 additions & 0 deletions src/caselawclient/identifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import json
from typing import NamedTuple

from caselawclient.models.documents import DocumentURIString
from caselawclient.xquery_type_dicts import MarkLogicDocumentURIString


class IdentifierResolution(NamedTuple):
identifier_uuid: str
document_uri: MarkLogicDocumentURIString
identifier_slug: DocumentURIString
document_published: bool

@staticmethod
def from_marklogic_output(raw_row: str) -> "IdentifierResolution":
row = json.loads(raw_row)
return IdentifierResolution(
identifier_uuid=row["documents.compiled_url_slugs.identifier_uuid"],
document_uri=MarkLogicDocumentURIString(row["documents.compiled_url_slugs.document_uri"]),
identifier_slug=DocumentURIString(row["documents.compiled_url_slugs.identifier_slug"]),
document_published=row["documents.compiled_url_slugs.document_published"] == "true",
)


class IdentifierResolutions(list[IdentifierResolution]):
@staticmethod
def from_marklogic_output(table: list[str]) -> "IdentifierResolutions":
return IdentifierResolutions(list(IdentifierResolution.from_marklogic_output(row) for row in table))

def published(self) -> "IdentifierResolutions":
return IdentifierResolutions(list(x for x in self if x.document_published))
6 changes: 3 additions & 3 deletions src/caselawclient/xquery/resolve_from_identifier.xqy
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
xquery version "1.0-ml";

declare namespace xdmp="http://marklogic.com/xdmp";
declare variable $uri as xs:string external;
declare variable $identifier_uri as xs:string external;

xdmp:sql(
"SELECT * from compiled_url_slugs WHERE documents.compiled_url_slugs.identifier_slug = @uri",
"array",
"map",
map:new((
map:entry("uri", $uri)
map:entry("uri", $identifier_uri)
))
)
5 changes: 5 additions & 0 deletions src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ class ListJudgmentVersionsDict(MarkLogicAPIDict):
uri: MarkLogicDocumentURIString


# resolve_from_identifier.xqy
class ResolveFromIdentifierDict(MarkLogicAPIDict):
identifier_uri: DocumentURIString


# set_boolean_property.xqy
class SetBooleanPropertyDict(MarkLogicAPIDict):
name: str
Expand Down

0 comments on commit 54b7536

Please sign in to comment.