From 6bfc155d4c205fac24c614296376f6af3ec5c7f9 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Mon, 28 Oct 2024 17:56:25 +0000 Subject: [PATCH 1/2] refactor(Document): initialising a Document now requires a DocumentURIString, not a str This previously accepted a str and would attempt to strip it and remove slashes. It now only accepts a DocumentURIString object. BREAKING CHANGE: Document can now no longer be initialised with a string as the `uri`, it must be a `DocumentURIString`. --- CHANGELOG.md | 6 ++++++ src/caselawclient/factories.py | 4 ++-- src/caselawclient/models/documents/__init__.py | 10 ++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e29a223..5a145dce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog 1.0.0]. +## Unreleased + +### BREAKING CHANGE + +- Document can now no longer be initialised with a string as the `uri`, it must be a `DocumentURIString`. + ## v27.4.0 (2024-11-07) ### Change of behaviour diff --git a/src/caselawclient/factories.py b/src/caselawclient/factories.py index 356e8e6b..efa38825 100644 --- a/src/caselawclient/factories.py +++ b/src/caselawclient/factories.py @@ -5,7 +5,7 @@ from typing_extensions import TypeAlias from caselawclient.Client import MarklogicApiClient -from caselawclient.models.documents import Document +from caselawclient.models.documents import Document, DocumentURIString from caselawclient.models.documents.body import DocumentBody from caselawclient.models.judgments import Judgment from caselawclient.models.press_summaries import PressSummary @@ -54,7 +54,7 @@ class DocumentFactory: @classmethod def build( cls, - uri: str = "test/2023/123", + uri: DocumentURIString = DocumentURIString("test/2023/123"), html: str = "

This is a judgment.

", api_client: Optional[MarklogicApiClient] = None, **kwargs: Any, diff --git a/src/caselawclient/models/documents/__init__.py b/src/caselawclient/models/documents/__init__.py index d4ec4e2d..7c3d5171 100644 --- a/src/caselawclient/models/documents/__init__.py +++ b/src/caselawclient/models/documents/__init__.py @@ -105,13 +105,15 @@ class Document: Individual document classes should extend this list where necessary to validate document type-specific attributes. """ - def __init__(self, uri: str, api_client: "MarklogicApiClient", search_query: Optional[str] = None): + def __init__(self, uri: DocumentURIString, api_client: "MarklogicApiClient", search_query: Optional[str] = None): """ - :param uri: For historical reasons this accepts a pseudo-URI which may include leading or trailing slashes. + :param uri: The URI of the document to retrieve from MarkLogic. + :param api_client: An instance of the API client object to handle communication with the MarkLogic server. + :param search_query: Optionally, a search string which should be highlighted if it appears in the document body. :raises DocumentNotFoundError: The document does not exist within MarkLogic """ - self.uri: DocumentURIString = DocumentURIString(uri.strip("/")) + self.uri: DocumentURIString = uri self.api_client: MarklogicApiClient = api_client if not self.document_exists(): raise DocumentNotFoundError(f"Document {self.uri} does not exist") @@ -123,7 +125,7 @@ def __init__(self, uri: str, api_client: "MarklogicApiClient", search_query: Opt search_query=search_query, ), ) - """ `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """ + """ `Document.body` represents the body of the document itself, without any information such as version tracking or properties. """ def __repr__(self) -> str: name = self.body.name or "un-named" From e65e3aec7050de131a39be3f8694702338154823 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 13 Nov 2024 13:49:29 +0000 Subject: [PATCH 2/2] feat: Validate strings when creating a new DocumentURIString This improves the robustness of `DocumentURIString`` handling by ensuring that they conform to the expected rules (no trailing or leading slashes, no dots). Since initialising a document now requires an instance of this class, and we can guarantee the string contained within meets our requirements, we can safely lose string sanitisation in the `Document` class. As part of this, `Judgment` and `PressSummary` have had their signatures updated to reinforce the expectation of `DocumentURIString` being provided as the `uri` argument. This led to identifying unsafe behaviour around the handling of press summary URIs and version URIs, which have now been made more explicit. BREAKING CHANGE: Code which provided unsanitised URIs when initialising `DocumentURIStrings` will now cause `InvalidDocumentURIException`s to be raised. --- src/caselawclient/Client.py | 6 ++-- .../models/documents/__init__.py | 25 +++++++++++-- .../models/documents/exceptions.py | 4 +++ src/caselawclient/models/judgments.py | 8 ++--- src/caselawclient/models/press_summaries.py | 8 ++--- .../models/utilities/__init__.py | 4 +-- .../xquery/get_components_for_document.xqy | 2 +- .../client/test_checkout_checkin_judgment.py | 12 +++---- tests/client/test_client.py | 16 ++------- tests/client/test_eval_xslt.py | 8 ++--- .../client/test_get_judgment_and_versions.py | 6 ++-- ...st_get_press_summaries_for_document_uri.py | 36 +++++++++---------- tests/client/test_get_set_metadata.py | 2 +- tests/client/test_get_set_properties.py | 14 ++++---- tests/client/test_get_version_annotation.py | 2 +- .../test_get_version_created_datetime.py | 2 +- .../client/test_save_copy_delete_judgment.py | 14 ++++---- tests/client/test_validate_document.py | 4 +-- tests/models/documents/test_document_verbs.py | 4 +-- tests/models/documents/test_documents.py | 36 ++++++++++++++----- tests/models/test_judgments.py | 17 ++++----- tests/models/test_press_summaries.py | 12 +++---- tests/models/utilities/test_utilities.py | 6 ++-- 23 files changed, 140 insertions(+), 108 deletions(-) diff --git a/src/caselawclient/Client.py b/src/caselawclient/Client.py index 143e2755..936b2c21 100644 --- a/src/caselawclient/Client.py +++ b/src/caselawclient/Client.py @@ -209,12 +209,14 @@ def get_press_summaries_for_document_uri( Returns a list of PressSummary objects associated with a given Document URI """ vars: query_dicts.GetComponentsForDocumentDict = { - "parent_uri": DocumentURIString(uri if uri.startswith("/") else "/" + uri), + "parent_uri": uri, "component": "pressSummary", } response = self._send_to_eval(vars, "get_components_for_document.xqy") uris = get_multipart_strings_from_marklogic_response(response) - return [PressSummary(uri.strip(".xml"), self) for uri in uris] + return [ + PressSummary(DocumentURIString(uri.strip("/").strip(".xml")), self) for uri in uris + ] # TODO: Migrate this strip behaviour into proper manipulation of a MarkLogicURIString def get_document_by_uri( self, diff --git a/src/caselawclient/models/documents/__init__.py b/src/caselawclient/models/documents/__init__.py index 7c3d5171..35873388 100644 --- a/src/caselawclient/models/documents/__init__.py +++ b/src/caselawclient/models/documents/__init__.py @@ -1,7 +1,7 @@ import datetime import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, NewType, Optional +from typing import TYPE_CHECKING, Any, Optional from ds_caselaw_utils import courts from ds_caselaw_utils.courts import CourtNotFoundException @@ -30,7 +30,7 @@ ) from .body import DocumentBody -from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion +from .exceptions import CannotPublishUnpublishableDocument, DocumentNotSafeForDeletion, InvalidDocumentURIException from .statuses import DOCUMENT_STATUS_HOLD, DOCUMENT_STATUS_IN_PROGRESS, DOCUMENT_STATUS_NEW, DOCUMENT_STATUS_PUBLISHED MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20) @@ -47,7 +47,26 @@ class GatewayTimeoutGettingHTMLWithQuery(RuntimeWarning): from caselawclient.Client import MarklogicApiClient -DocumentURIString = NewType("DocumentURIString", str) +class DocumentURIString(str): + """ + This class checks that the string is actually a valid Document URI on creation. It does _not_ manipulate the string. + """ + + def __new__(cls, content: str) -> "DocumentURIString": + # Check that the URI doesn't begin or end with a slash + if content[0] == "/" or content[-1] == "/": + raise InvalidDocumentURIException( + f'"{content}" is not a valid document URI; URIs cannot begin or end with slashes.' + ) + + # Check that the URI doesn't contain a full stop + if "." in content: + raise InvalidDocumentURIException( + f'"{content}" is not a valid document URI; URIs cannot contain full stops.' + ) + + # If everything is good, return as usual + return str.__new__(cls, content) class Document: diff --git a/src/caselawclient/models/documents/exceptions.py b/src/caselawclient/models/documents/exceptions.py index a2d04d83..2ca1ee37 100644 --- a/src/caselawclient/models/documents/exceptions.py +++ b/src/caselawclient/models/documents/exceptions.py @@ -4,3 +4,7 @@ class CannotPublishUnpublishableDocument(Exception): class DocumentNotSafeForDeletion(Exception): """A document which is not safe for deletion cannot be deleted.""" + + +class InvalidDocumentURIException(Exception): + """The document URI is not valid.""" diff --git a/src/caselawclient/models/judgments.py b/src/caselawclient/models/judgments.py index 61366662..730c3c0c 100644 --- a/src/caselawclient/models/judgments.py +++ b/src/caselawclient/models/judgments.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from caselawclient.models.press_summaries import PressSummary -from .documents import Document +from .documents import Document, DocumentURIString class Judgment(NeutralCitationMixin, Document): @@ -21,8 +21,8 @@ class Judgment(NeutralCitationMixin, Document): document_noun = "judgment" document_noun_plural = "judgments" - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(self.document_noun, *args, **kwargs) + def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None: + super().__init__(self.document_noun, uri, *args, **kwargs) @cached_property def neutral_citation(self) -> NeutralCitationString: @@ -46,7 +46,7 @@ def linked_document(self) -> Optional["PressSummary"]: Attempt to fetch a linked press summary, and return it, if it exists """ try: - uri = self.uri + "/press-summary/1" + uri = DocumentURIString(self.uri + "/press-summary/1") if not TYPE_CHECKING: # This isn't nice, but will be cleaned up when we refactor how related documents work PressSummary = importlib.import_module("caselawclient.models.press_summaries").PressSummary return PressSummary(uri, self.api_client) diff --git a/src/caselawclient/models/press_summaries.py b/src/caselawclient/models/press_summaries.py index 39c9cacc..2996c132 100644 --- a/src/caselawclient/models/press_summaries.py +++ b/src/caselawclient/models/press_summaries.py @@ -9,7 +9,7 @@ from caselawclient.errors import DocumentNotFoundError from caselawclient.models.neutral_citation_mixin import NeutralCitationMixin -from .documents import Document +from .documents import Document, DocumentURIString if TYPE_CHECKING: from caselawclient.models.judgments import Judgment @@ -23,8 +23,8 @@ class PressSummary(NeutralCitationMixin, Document): document_noun = "press summary" document_noun_plural = "press summaries" - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(self.document_noun, *args, **kwargs) + def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None: + super().__init__(self.document_noun, uri, *args, **kwargs) @cached_property def neutral_citation(self) -> NeutralCitationString: @@ -47,7 +47,7 @@ def linked_document(self) -> Optional[Judgment]: Attempt to fetch a linked judgement, and return it, if it exists """ try: - uri = self.uri.removesuffix("/press-summary/1") + uri = DocumentURIString(self.uri.removesuffix("/press-summary/1")) if not TYPE_CHECKING: # This isn't nice, but will be cleaned up when we refactor how related documents work Judgment = importlib.import_module("caselawclient.models.judgments").Judgment return Judgment(uri, self.api_client) diff --git a/src/caselawclient/models/utilities/__init__.py b/src/caselawclient/models/utilities/__init__.py index 46918aaa..aa46e2ad 100644 --- a/src/caselawclient/models/utilities/__init__.py +++ b/src/caselawclient/models/utilities/__init__.py @@ -12,14 +12,14 @@ class VersionsDict(TypedDict): - uri: str + uri: str ## TODO: This should be either a MarkLogicDocumentURIString (raw from ML) or a DocumentURIString (and we parse it out). Just a str is too vague. version: int def render_versions(decoded_versions: list[BodyPart]) -> list[VersionsDict]: versions: list[VersionsDict] = [ { - "uri": part.text.rstrip(".xml"), + "uri": part.text.strip("/").rstrip(".xml"), "version": extract_version(part.text), } for part in decoded_versions diff --git a/src/caselawclient/xquery/get_components_for_document.xqy b/src/caselawclient/xquery/get_components_for_document.xqy index 536388b3..e7ef44d6 100644 --- a/src/caselawclient/xquery/get_components_for_document.xqy +++ b/src/caselawclient/xquery/get_components_for_document.xqy @@ -14,7 +14,7 @@ let $docTypeQuery := cts:element-attribute-value-query( ) let $refQuery := cts:element-query( xs:QName("uk:summaryOf"), - concat("https://caselaw.nationalarchives.gov.uk/id", $parent_uri) + concat("https://caselaw.nationalarchives.gov.uk/id/", $parent_uri) ) return xdmp:node-uri(cts:search(//akn:akomaNtoso, cts:and-query(($refQuery, $collectionQuery, $docTypeQuery)))) diff --git a/tests/client/test_checkout_checkin_judgment.py b/tests/client/test_checkout_checkin_judgment.py index dc49427a..f6377c65 100644 --- a/tests/client/test_checkout_checkin_judgment.py +++ b/tests/client/test_checkout_checkin_judgment.py @@ -14,7 +14,7 @@ def setUp(self): def test_checkout_judgment(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") annotation = "locked by A KITTEN" expected_vars = { "uri": "/ewca/civ/2004/632.xml", @@ -35,7 +35,7 @@ def test_checkout_judgment_with_midnight_timeout(self): "calculate_seconds_until_midnight", return_value=3600, ): - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") annotation = "locked by A KITTEN" expires_at_midnight = True expected_vars = { @@ -50,7 +50,7 @@ def test_checkout_judgment_with_midnight_timeout(self): def test_checkout_judgment_with_timeout_seconds(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") annotation = "locked by A KITTEN" timeout_seconds = 1234 expected_vars = { @@ -65,7 +65,7 @@ def test_checkout_judgment_with_timeout_seconds(self): def test_checkin_judgment(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") expected_vars = {"uri": "/ewca/civ/2004/632.xml"} self.client.checkin_judgment(uri) @@ -101,7 +101,7 @@ def test_get_checkout_status_message(self): b"\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_judgment_checkout_status_message(DocumentURIString("/ewca/2002/2")) + result = self.client.get_judgment_checkout_status_message(DocumentURIString("ewca/2002/2")) assert result == "locked by a kitten" def test_get_checkout_status_message_empty(self): @@ -118,7 +118,7 @@ def test_get_checkout_status_message_empty(self): b"\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_judgment_checkout_status_message(DocumentURIString("/ewca/2002/2")) + result = self.client.get_judgment_checkout_status_message(DocumentURIString("ewca/2002/2")) assert result is None def test_calculate_seconds_until_midnight(self): diff --git a/tests/client/test_client.py b/tests/client/test_client.py index 0f51d509..658db880 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -106,7 +106,7 @@ def test_eval_and_decode(self, mock_eval): @patch("caselawclient.Client.MarklogicApiClient._eval_and_decode") def test_document_exists(self, mock_decode): mock_decode.return_value = "true" - assert self.client.document_exists(DocumentURIString("/2029/eat/1")) is True + assert self.client.document_exists(DocumentURIString("2029/eat/1")) is True mock_decode.assert_called_with( {"uri": "/2029/eat/1.xml"}, "document_exists.xqy", @@ -115,7 +115,7 @@ def test_document_exists(self, mock_decode): @patch("caselawclient.Client.MarklogicApiClient._eval_and_decode") def test_document_not_exists(self, mock_decode): mock_decode.return_value = "false" - assert self.client.document_exists(DocumentURIString("/2029/eat/1")) is False + assert self.client.document_exists(DocumentURIString("2029/eat/1")) is False mock_decode.assert_called_with( {"uri": "/2029/eat/1.xml"}, "document_exists.xqy", @@ -158,21 +158,9 @@ def test_invoke_calls_request(self, MockPath): ) def test_format_uri(self): - uri = DocumentURIString("/ewca/2022/123") - assert self.client._format_uri_for_marklogic(uri) == "/ewca/2022/123.xml" - - def test_format_uri_no_leading_slash(self): uri = DocumentURIString("ewca/2022/123") assert self.client._format_uri_for_marklogic(uri) == "/ewca/2022/123.xml" - def test_format_uri_trailing_slash(self): - uri = DocumentURIString("ewca/2022/123/") - assert self.client._format_uri_for_marklogic(uri) == "/ewca/2022/123.xml" - - def test_format_uri_all_the_slashes(self): - uri = DocumentURIString("/ewca/2022/123/") - assert self.client._format_uri_for_marklogic(uri) == "/ewca/2022/123.xml" - def test_user_agent(self): user_agent = self.client.session.prepare_request( Request("GET", "http://example.invalid"), diff --git a/tests/client/test_eval_xslt.py b/tests/client/test_eval_xslt.py index e7779a2b..2c40ac73 100644 --- a/tests/client/test_eval_xslt.py +++ b/tests/client/test_eval_xslt.py @@ -20,7 +20,7 @@ def test_eval_xslt_user_can_view_unpublished(self): "user_can_view_unpublished_judgments", return_value=True, ): - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars: XsltTransformDict = { "uri": MarkLogicDocumentURIString("/judgment/uri.xml"), "version_uri": None, @@ -43,7 +43,7 @@ def test_eval_xslt_user_cannot_view_unpublished(self): "user_can_view_unpublished_judgments", return_value=False, ), patch.object(logging, "warning") as mock_logging: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars: XsltTransformDict = { "uri": MarkLogicDocumentURIString("/judgment/uri.xml"), "version_uri": None, @@ -67,7 +67,7 @@ def test_eval_xslt_with_filename(self): "user_can_view_unpublished_judgments", return_value=True, ): - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars: XsltTransformDict = { "uri": MarkLogicDocumentURIString("/judgment/uri.xml"), "version_uri": None, @@ -92,7 +92,7 @@ def test_eval_xslt_with_query(self): "user_can_view_unpublished_judgments", return_value=True, ): - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") query = "the query string" expected_vars: XsltTransformDict = { "uri": MarkLogicDocumentURIString("/judgment/uri.xml"), diff --git a/tests/client/test_get_judgment_and_versions.py b/tests/client/test_get_judgment_and_versions.py index dde9c453..bfadeb73 100644 --- a/tests/client/test_get_judgment_and_versions.py +++ b/tests/client/test_get_judgment_and_versions.py @@ -29,7 +29,7 @@ def test_get_judgment_xml(self): b"" ) - result = self.client.get_judgment_xml(DocumentURIString("/judgment/uri")) + result = self.client.get_judgment_xml(DocumentURIString("judgment/uri")) expected = ( '\n' @@ -42,7 +42,7 @@ def test_get_judgment_xml(self): def test_get_judgment_version(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") version = 3 expected_vars = {"uri": "/ewca/civ/2004/632.xml", "version": "3"} self.client.get_judgment_version(uri, version) @@ -52,7 +52,7 @@ def test_get_judgment_version(self): def test_list_judgment_versions(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") expected_vars = {"uri": "/ewca/civ/2004/632.xml"} self.client.list_judgment_versions(uri) diff --git a/tests/client/test_get_press_summaries_for_document_uri.py b/tests/client/test_get_press_summaries_for_document_uri.py index b81ddae1..e06db03a 100644 --- a/tests/client/test_get_press_summaries_for_document_uri.py +++ b/tests/client/test_get_press_summaries_for_document_uri.py @@ -19,25 +19,23 @@ def test_get_press_summaries_for_document_uri( mock_press_summary, ): mock_eval.return_value = "EVAL" - mock_get_marklogic_response.return_value = ["/foo/bar/baz/1", "/foo/bar/baz/2"] + mock_get_marklogic_response.return_value = ["foo/bar/baz/1", "foo/bar/baz/2"] - for uri in ["foo/bar", "/foo/bar"]: - with self.subTest(uri=uri): - self.client.get_press_summaries_for_document_uri(DocumentURIString(uri)) + self.client.get_press_summaries_for_document_uri(DocumentURIString("foo/bar")) - mock_get_marklogic_response.assert_called_with("EVAL") - mock_eval.assert_called_with( - { - "parent_uri": "/foo/bar", - "component": "pressSummary", - }, - "get_components_for_document.xqy", - ) + mock_get_marklogic_response.assert_called_with("EVAL") + mock_eval.assert_called_with( + { + "parent_uri": "foo/bar", + "component": "pressSummary", + }, + "get_components_for_document.xqy", + ) - mock_press_summary.assert_has_calls( - [ - call("/foo/bar/baz/1", self.client), - call("/foo/bar/baz/2", self.client), - ], - any_order=True, - ) + mock_press_summary.assert_has_calls( + [ + call("foo/bar/baz/1", self.client), + call("foo/bar/baz/2", self.client), + ], + any_order=True, + ) diff --git a/tests/client/test_get_set_metadata.py b/tests/client/test_get_set_metadata.py index df38b9e2..eab07607 100644 --- a/tests/client/test_get_set_metadata.py +++ b/tests/client/test_get_set_metadata.py @@ -153,7 +153,7 @@ def test_set_judgment_date_warn(self): def test_set_internal_uri_leading_slash(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars = { "uri": "/judgment/uri.xml", "content_with_id": "https://caselaw.nationalarchives.gov.uk/id/judgment/uri", diff --git a/tests/client/test_get_set_properties.py b/tests/client/test_get_set_properties.py index fbbcbebe..0a8da427 100644 --- a/tests/client/test_get_set_properties.py +++ b/tests/client/test_get_set_properties.py @@ -13,7 +13,7 @@ def setUp(self): def test_set_boolean_property_true(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars = { "uri": "/judgment/uri.xml", "value": "true", @@ -29,7 +29,7 @@ def test_set_boolean_property_true(self): def test_set_boolean_property_false(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars = { "uri": "/judgment/uri.xml", "value": "false", @@ -46,7 +46,7 @@ def test_set_boolean_property_false(self): def test_get_unset_boolean_property(self): with patch.object(self.client, "eval") as mock_eval: mock_eval.return_value.content = "" - result = self.client.get_boolean_property(DocumentURIString("/judgment/uri"), "my-property") + result = self.client.get_boolean_property(DocumentURIString("judgment/uri"), "my-property") assert result is False @@ -65,7 +65,7 @@ def test_get_boolean_property(self): b"\r\ntrue\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_boolean_property(DocumentURIString("/judgment/uri"), "my-property") + result = self.client.get_boolean_property(DocumentURIString("judgment/uri"), "my-property") assert result is True @@ -84,20 +84,20 @@ def test_get_property(self): b"\r\nmy-content\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_property(DocumentURIString("/judgment/uri"), "my-property") + result = self.client.get_property(DocumentURIString("judgment/uri"), "my-property") assert result == "my-content" def test_get_unset_property(self): with patch.object(self.client, "eval") as mock_eval: mock_eval.return_value.content = "" - result = self.client.get_property(DocumentURIString("/judgment/uri"), "my-property") + result = self.client.get_property(DocumentURIString("judgment/uri"), "my-property") assert result == "" def test_set_property(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars = { "uri": "/judgment/uri.xml", "value": "my-value", diff --git a/tests/client/test_get_version_annotation.py b/tests/client/test_get_version_annotation.py index ef674a21..19b562d3 100644 --- a/tests/client/test_get_version_annotation.py +++ b/tests/client/test_get_version_annotation.py @@ -23,6 +23,6 @@ def test_get_version_annotation(self): b"\r\nthis is an annotation\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_version_annotation(DocumentURIString("/judgment/uri")) + result = self.client.get_version_annotation(DocumentURIString("judgment/uri")) assert result == "this is an annotation" diff --git a/tests/client/test_get_version_created_datetime.py b/tests/client/test_get_version_created_datetime.py index 054dedb4..673c3a78 100644 --- a/tests/client/test_get_version_created_datetime.py +++ b/tests/client/test_get_version_created_datetime.py @@ -24,7 +24,7 @@ def test_get_version_created_datetime(self): b"\r\n2022-04-11T16:12:33.548954+01:00\r\n" b"--595658fa1db1aa98--\r\n" ) - result = self.client.get_version_created_datetime(DocumentURIString("/judgment/uri")) + result = self.client.get_version_created_datetime(DocumentURIString("judgment/uri")) assert result == datetime.datetime( 2022, diff --git a/tests/client/test_save_copy_delete_judgment.py b/tests/client/test_save_copy_delete_judgment.py index 86520a65..a9f728f1 100644 --- a/tests/client/test_save_copy_delete_judgment.py +++ b/tests/client/test_save_copy_delete_judgment.py @@ -25,7 +25,7 @@ def setUp(self): def test_update_document_xml(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") judgment_str = "My updated judgment" judgment_xml = ElementTree.fromstring(judgment_str) expected_vars = { @@ -65,7 +65,7 @@ def test_save_locked_judgment_xml(self): with patch.object(caselawclient.Client, "validate_content_hash"), patch.object( self.client, "eval" ) as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") judgment_str = "My updated judgment" judgment_xml = judgment_str.encode("utf-8") expected_vars = { @@ -106,7 +106,7 @@ def test_save_locked_judgment_xml_checks_content_hash(self): caselawclient.Client, "validate_content_hash", ) as mock_validate_hash: - uri = DocumentURIString("/ewca/civ/2004/632") + uri = DocumentURIString("ewca/civ/2004/632") judgment_str = "My updated judgment" judgment_xml = judgment_str.encode("utf-8") mock_validate_hash.side_effect = InvalidContentHashError() @@ -124,7 +124,7 @@ def test_save_locked_judgment_xml_checks_content_hash(self): def test_insert_document_xml(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/ewca/civ/2004/632/") + uri = DocumentURIString("ewca/civ/2004/632") document_str = "My judgment" document_xml = ElementTree.fromstring(document_str) expected_vars = { @@ -157,7 +157,7 @@ def test_insert_document_xml(self): def test_delete_document(self): with patch.object(self.client, "eval") as mock_eval: - uri = DocumentURIString("/judgment/uri") + uri = DocumentURIString("judgment/uri") expected_vars = { "uri": "/judgment/uri.xml", } @@ -168,8 +168,8 @@ def test_delete_document(self): def test_copy_document(self): with patch.object(self.client, "eval") as mock_eval: - old_uri = DocumentURIString("/judgment/old_uri") - new_uri = DocumentURIString("/judgment/new_uri") + old_uri = DocumentURIString("judgment/old_uri") + new_uri = DocumentURIString("judgment/new_uri") expected_vars = { "old_uri": "/judgment/old_uri.xml", "new_uri": "/judgment/new_uri.xml", diff --git a/tests/client/test_validate_document.py b/tests/client/test_validate_document.py index 40e895ae..da77ad51 100644 --- a/tests/client/test_validate_document.py +++ b/tests/client/test_validate_document.py @@ -24,7 +24,7 @@ def test_validation_success(self): b"--c878f7cb55370005--\r\n" ) - assert self.client.validate_document(DocumentURIString("/foo/bar/123")) is True + assert self.client.validate_document(DocumentURIString("foo/bar/123")) is True def test_validation_failure(self): with patch.object(self.client, "eval") as mock_eval: @@ -44,4 +44,4 @@ def test_validation_failure(self): b"--c878f7cb55370005--\r\n" ) - assert self.client.validate_document(DocumentURIString("/foo/bar/123")) is False + assert self.client.validate_document(DocumentURIString("foo/bar/123")) is False diff --git a/tests/models/documents/test_document_verbs.py b/tests/models/documents/test_document_verbs.py index aabb25c7..3603944c 100644 --- a/tests/models/documents/test_document_verbs.py +++ b/tests/models/documents/test_document_verbs.py @@ -172,7 +172,7 @@ class TestReparse: @patch.dict(os.environ, {"PRIVATE_ASSET_BUCKET": "MY_BUCKET"}) @patch.dict(os.environ, {"REPARSE_SNS_TOPIC": "MY_TOPIC"}) def test_force_reparse_empty(self, sns, mock_api_client): - document = Judgment("test/2023/123", mock_api_client) + document = Judgment(DocumentURIString("test/2023/123"), mock_api_client) document.consignment_reference = "TDR-12345" @@ -224,7 +224,7 @@ def test_force_reparse_empty(self, sns, mock_api_client): @patch.dict(os.environ, {"PRIVATE_ASSET_BUCKET": "MY_BUCKET"}) @patch.dict(os.environ, {"REPARSE_SNS_TOPIC": "MY_TOPIC"}) def test_force_reparse_full(self, sns, mock_api_client): - document = Judgment("test/2023/123", mock_api_client) + document = Judgment(DocumentURIString("test/2023/123"), mock_api_client) document.neutral_citation = NeutralCitationString("[2023] Test 123") document.consignment_reference = "TDR-12345" diff --git a/tests/models/documents/test_documents.py b/tests/models/documents/test_documents.py index 0127ab5b..dd065a6f 100644 --- a/tests/models/documents/test_documents.py +++ b/tests/models/documents/test_documents.py @@ -18,13 +18,14 @@ Document, DocumentURIString, ) +from caselawclient.models.documents.exceptions import InvalidDocumentURIException from caselawclient.models.judgments import Judgment from tests.test_helpers import MockMultipartResponse class TestDocument: def test_has_sensible_repr_with_name_and_judgment(self, mock_api_client): - document = Judgment("test/1234", mock_api_client) + document = Judgment(DocumentURIString("test/1234"), mock_api_client) document.body.name = "Document Name" assert str(document) == "" @@ -32,11 +33,6 @@ def test_has_sensible_repr_without_name_or_subclass(self, mock_api_client): document = Document(DocumentURIString("test/1234"), mock_api_client) assert str(document) == "" - def test_uri_strips_slashes(self, mock_api_client): - document = Document(DocumentURIString("////test/1234/////"), mock_api_client) - - assert document.uri == "test/1234" - def test_public_uri(self, mock_api_client): document = Document(DocumentURIString("test/1234"), mock_api_client) @@ -168,8 +164,8 @@ def test_document_version_of_a_version_fails(self, mock_api_client): def test_document_versions_happy_case(self, mock_api_client): version_document = Document(DocumentURIString("test/1234"), mock_api_client) version_document.versions = [ - {"uri": "test/1234_xml_versions/2-1234.xml", "version": 2}, - {"uri": "test/1234_xml_versions/1-1234.xml", "version": 1}, + {"uri": "test/1234_xml_versions/2-1234", "version": 2}, + {"uri": "test/1234_xml_versions/1-1234", "version": 1}, ] version_document.versions_as_documents[0].uri = "test/1234_xml_versions/2-1234.xml" @@ -327,3 +323,27 @@ def test_absent_item(self, mock_api_client): AttributeError, match="Neither 'Document' nor 'DocumentBody' objects have an attribute 'x'" ): doc.x + + +class TestDocumentURIString: + def test_accepts_two_element_uri(self): + DocumentURIString("test/1234") + + def test_accepts_three_element_uri(self): + DocumentURIString("test/b/1234") + + def test_rejects_uri_with_leading_slash(self): + with pytest.raises(InvalidDocumentURIException): + DocumentURIString("/test/1234") + + def test_rejects_uri_with_trailing_slash(self): + with pytest.raises(InvalidDocumentURIException): + DocumentURIString("test/1234/") + + def test_rejects_uri_with_leading_and_trailing_slash(self): + with pytest.raises(InvalidDocumentURIException): + DocumentURIString("/test/1234/") + + def test_rejects_uri_with_dot(self): + with pytest.raises(InvalidDocumentURIException): + DocumentURIString("test/1234.xml") diff --git a/tests/models/test_judgments.py b/tests/models/test_judgments.py index e63394a7..814ea087 100644 --- a/tests/models/test_judgments.py +++ b/tests/models/test_judgments.py @@ -4,23 +4,24 @@ from caselawclient.errors import DocumentNotFoundError from caselawclient.factories import PressSummaryFactory +from caselawclient.models.documents import DocumentURIString from caselawclient.models.judgments import Judgment from caselawclient.models.neutral_citation_mixin import NeutralCitationString class TestJudgment: def test_best_identifier(self, mock_api_client): - judgment = Judgment("test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) judgment.neutral_citation = NeutralCitationString("[2023] TEST 1234") assert judgment.best_human_identifier == judgment.neutral_citation class TestJudgmentValidation: def test_has_ncn(self, mock_api_client): - document_with_ncn = Judgment("test/1234", mock_api_client) + document_with_ncn = Judgment(DocumentURIString("test/1234"), mock_api_client) document_with_ncn.neutral_citation = NeutralCitationString("[2023] TEST 1234") - document_without_ncn = Judgment("test/1234", mock_api_client) + document_without_ncn = Judgment(DocumentURIString("test/1234"), mock_api_client) document_without_ncn.neutral_citation = NeutralCitationString("") assert document_with_ncn.has_ncn is True @@ -40,7 +41,7 @@ def test_judgment_neutral_citation(self, mock_api_client): """ - judgment = Judgment("test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) assert judgment.neutral_citation == "[2023] TEST 1234" mock_api_client.get_judgment_xml_bytestring.assert_called_once_with( @@ -71,13 +72,13 @@ def test_judgment_neutral_citation(self, mock_api_client): ], ) def test_has_valid_ncn(self, mock_api_client, ncn_to_test, valid): - judgment = Judgment("test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) judgment.neutral_citation = ncn_to_test assert judgment.has_valid_ncn is valid def test_judgment_validation_failure_messages_if_failing(self, mock_api_client): - judgment = Judgment("test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) judgment.is_failure = True judgment.is_parked = True judgment.is_held = True @@ -105,7 +106,7 @@ def test_linked_document(self, document_mock, mock_api_client): press_summary = PressSummaryFactory.build() document_mock.return_value = press_summary - judgment = Judgment("/test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) assert judgment.linked_document == press_summary document_mock.assert_called_once_with( @@ -121,5 +122,5 @@ def test_linked_document_returns_nothing_when_does_not_exist( ): document_mock.side_effect = DocumentNotFoundError() - judgment = Judgment("/test/1234", mock_api_client) + judgment = Judgment(DocumentURIString("test/1234"), mock_api_client) assert judgment.linked_document is None diff --git a/tests/models/test_press_summaries.py b/tests/models/test_press_summaries.py index d0224b16..cd4e461b 100644 --- a/tests/models/test_press_summaries.py +++ b/tests/models/test_press_summaries.py @@ -11,7 +11,7 @@ class TestPressSummary: def test_best_identifier(self, mock_api_client): - summary = PressSummary("test/1234", mock_api_client) + summary = PressSummary(DocumentURIString("test/1234"), mock_api_client) summary.neutral_citation = NeutralCitationString("[2023] TEST 1234") assert summary.best_human_identifier == summary.neutral_citation @@ -47,7 +47,7 @@ def test_press_summary_neutral_citation(self, mock_api_client): """ - press_summary = PressSummary("test/1234", mock_api_client) + press_summary = PressSummary(DocumentURIString("test/1234"), mock_api_client) assert press_summary.neutral_citation == "[2016] TEST 49" mock_api_client.get_judgment_xml_bytestring.assert_called_once_with( @@ -78,7 +78,7 @@ def test_press_summary_neutral_citation(self, mock_api_client): ], ) def test_has_valid_ncn(self, mock_api_client, ncn_to_test, valid): - press_summary = PressSummary("test/1234", mock_api_client) + press_summary = PressSummary(DocumentURIString("test/1234"), mock_api_client) press_summary.neutral_citation = ncn_to_test assert press_summary.has_valid_ncn is valid @@ -87,7 +87,7 @@ def test_press_summary_validation_failure_messages_if_failing( self, mock_api_client, ): - press_summary = PressSummary("test/1234", mock_api_client) + press_summary = PressSummary(DocumentURIString("test/1234"), mock_api_client) press_summary.is_failure = True press_summary.is_parked = True press_summary.is_held = True @@ -115,7 +115,7 @@ def test_linked_document(self, document_mock, mock_api_client): judgment = JudgmentFactory.build() document_mock.return_value = judgment - press_summary = PressSummary("/test/1234/press-summary/1", mock_api_client) + press_summary = PressSummary(DocumentURIString("test/1234/press-summary/1"), mock_api_client) assert press_summary.linked_document == judgment document_mock.assert_called_once_with("test/1234", mock_api_client) @@ -128,5 +128,5 @@ def test_linked_document_returns_nothing_when_does_not_exist( ): document_mock.side_effect = DocumentNotFoundError() - press_summary = PressSummary("/test/1234/press-summary/1", mock_api_client) + press_summary = PressSummary(DocumentURIString("test/1234/press-summary/1"), mock_api_client) assert press_summary.linked_document is None diff --git a/tests/models/utilities/test_utilities.py b/tests/models/utilities/test_utilities.py index 4b012381..2ba24f28 100644 --- a/tests/models/utilities/test_utilities.py +++ b/tests/models/utilities/test_utilities.py @@ -39,9 +39,9 @@ def test_render_versions(self): requests_toolbelt.multipart.decoder.BodyPart.return_value = version_parts expected_result = [ - {"uri": "/ewhc/ch/2022/1178_xml_versions/3-1178", "version": 3}, - {"uri": "/ewhc/ch/2022/1178_xml_versions/2-1178", "version": 2}, - {"uri": "/ewhc/ch/2022/1178_xml_versions/1-1178", "version": 1}, + {"uri": "ewhc/ch/2022/1178_xml_versions/3-1178", "version": 3}, + {"uri": "ewhc/ch/2022/1178_xml_versions/2-1178", "version": 2}, + {"uri": "ewhc/ch/2022/1178_xml_versions/1-1178", "version": 1}, ] assert render_versions(version_parts) == expected_result