diff --git a/src/caselawclient/Client.py b/src/caselawclient/Client.py index 10c2c86b..16a089bf 100644 --- a/src/caselawclient/Client.py +++ b/src/caselawclient/Client.py @@ -1217,3 +1217,7 @@ def resolve_from_identifier(self, identifier_uri: str, published_only: bool = Tr ), ) return IdentifierResolutions.from_marklogic_output(raw_results) + + def get_next_document_sequence_number(self) -> int: + """Increment the MarkLogic sequence number by one and return the value.""" + return int(self._eval_and_decode({}, "get_next_document_sequence_number.xqy")) diff --git a/src/caselawclient/models/identifiers/fclid.py b/src/caselawclient/models/identifiers/fclid.py new file mode 100644 index 00000000..83e75891 --- /dev/null +++ b/src/caselawclient/models/identifiers/fclid.py @@ -0,0 +1,48 @@ +import re +from typing import TYPE_CHECKING + +from sqids import Sqids + +from . import Identifier, IdentifierSchema + +if TYPE_CHECKING: + from caselawclient.Client import MarklogicApiClient + + +VALID_FCLID_PATTERN = re.compile(r"^[bcdfghjkmnpqrstvwxyz23456789]{4,}$") + +FCLID_MINIMUM_LENGTH = 8 +FCLID_ALPHABET = "bcdfghjkmnpqrstvwxyz23456789" + +sqids = Sqids( + min_length=FCLID_MINIMUM_LENGTH, + alphabet=FCLID_ALPHABET, +) + + +class FindCaseLawIdentifierSchema(IdentifierSchema): + """ + Identifier schema describing a Find Case Law Identifier. + """ + + name = "Find Case Law Identifier" + namespace = "fclid" + + @classmethod + def validate_identifier(cls, value: str) -> bool: + return bool(VALID_FCLID_PATTERN.match(value)) + + @classmethod + def compile_identifier_url_slug(cls, value: str) -> str: + return "tna." + value + + @classmethod + def mint(cls, api_client: "MarklogicApiClient") -> "FindCaseLawIdentifier": + """Generate a totally new Find Case Law identifier.""" + next_sequence_number = api_client.get_next_document_sequence_number() + new_identifier = sqids.encode([next_sequence_number]) + return FindCaseLawIdentifier(value=new_identifier) + + +class FindCaseLawIdentifier(Identifier): + schema = FindCaseLawIdentifierSchema diff --git a/src/caselawclient/models/identifiers/unpacker.py b/src/caselawclient/models/identifiers/unpacker.py index 101b2e50..c98937a8 100644 --- a/src/caselawclient/models/identifiers/unpacker.py +++ b/src/caselawclient/models/identifiers/unpacker.py @@ -3,9 +3,11 @@ from lxml import etree from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier, Identifiers, InvalidIdentifierXMLRepresentationException +from .fclid import FindCaseLawIdentifier from .neutral_citation import NeutralCitationNumber IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = { + "fclid": FindCaseLawIdentifier, "ukncn": NeutralCitationNumber, } diff --git a/src/caselawclient/xquery/get_next_document_sequence_number.xqy b/src/caselawclient/xquery/get_next_document_sequence_number.xqy new file mode 100644 index 00000000..d56b3bd9 --- /dev/null +++ b/src/caselawclient/xquery/get_next_document_sequence_number.xqy @@ -0,0 +1,14 @@ +xquery version "1.0-ml"; +declare option xdmp:transaction-mode "update"; + +let $_ := xdmp:set-transaction-mode("update") +let $state_doc := fn:doc("state.xml") +let $counter_node := $state_doc/state/document_counter + +let $current_counter := $counter_node/text() +let $new_counter := fn:sum(($current_counter, 1)) + +let $_ := xdmp:node-replace($counter_node, {$new_counter}) +let $_ := xdmp:commit() + +return $new_counter