From 0932df73f0558b80c4173dfcce28aa7d55323c83 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 2 Oct 2024 09:55:47 +0100 Subject: [PATCH 1/2] refactor(types): move autogen court types to new types submodule Utils will need to play host to multiple new types which explicitly describe concepts relating to courts; the existing autogenerated types from the schema should logically live in the same place, which is a new submobule. This should not affect downstream consumers of the package, as these court types are only used internally. --- .pre-commit-config.yaml | 2 +- jsonschema-gentypes.yaml | 2 +- src/ds_caselaw_utils/courts.py | 2 +- src/ds_caselaw_utils/factory.py | 2 +- src/ds_caselaw_utils/types/__init__.py | 0 .../courts_schema_autogen.py} | 0 6 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 src/ds_caselaw_utils/types/__init__.py rename src/ds_caselaw_utils/{courts_schema_types_autogenerated.py => types/courts_schema_autogen.py} (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b7f53d..1035175 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: courts_schema_types_autogenerated\.py +exclude: courts_schema_autogen\.py repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 diff --git a/jsonschema-gentypes.yaml b/jsonschema-gentypes.yaml index e28a7d4..c8d995a 100644 --- a/jsonschema-gentypes.yaml +++ b/jsonschema-gentypes.yaml @@ -8,7 +8,7 @@ generate: - # JSON schema file path source: src/ds_caselaw_utils/data/schema/courts.schema.json # Python file path - destination: src/ds_caselaw_utils/courts_schema_types_autogenerated.py + destination: src/ds_caselaw_utils/types/courts_schema_autogen.py # The name of the root element root_name: RawCourtRepository # Argument passed to the API diff --git a/src/ds_caselaw_utils/courts.py b/src/ds_caselaw_utils/courts.py index 66fa9d4..893bf7d 100644 --- a/src/ds_caselaw_utils/courts.py +++ b/src/ds_caselaw_utils/courts.py @@ -10,7 +10,7 @@ from ruamel.yaml import YAML -from ds_caselaw_utils.courts_schema_types_autogenerated import ( +from ds_caselaw_utils.types.courts_schema_autogen import ( RawCourt, RawCourtRepository, RawJurisdiction, diff --git a/src/ds_caselaw_utils/factory.py b/src/ds_caselaw_utils/factory.py index c383fb2..ca4c80c 100644 --- a/src/ds_caselaw_utils/factory.py +++ b/src/ds_caselaw_utils/factory.py @@ -1,7 +1,7 @@ import typing from .courts import Court -from .courts_schema_types_autogenerated import RawCourt, RawCourtRepository +from .types.courts_schema_autogen import RawCourt, RawCourtRepository class CourtFactory(Court): diff --git a/src/ds_caselaw_utils/types/__init__.py b/src/ds_caselaw_utils/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ds_caselaw_utils/courts_schema_types_autogenerated.py b/src/ds_caselaw_utils/types/courts_schema_autogen.py similarity index 100% rename from src/ds_caselaw_utils/courts_schema_types_autogenerated.py rename to src/ds_caselaw_utils/types/courts_schema_autogen.py From a1655cdb7444702a8e1b3901c0ff61995eb39fd0 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 2 Oct 2024 10:59:39 +0100 Subject: [PATCH 2/2] feat(types): add new types for NCN-related strings These new types explicitly allow us to state "this string represents a Neutral Citation" and "this string represents a URI we have derived from a Neutral Citation". They will replace types serving a similar function in API Client, giving us full end-to-end type integrity for NCNs and derived-URIs for the first time. BREAKING CHANGE: Existing calls to `neutral_url()` will need to be made type-aware in all downstream projects where typechecking is used. --- src/ds_caselaw_utils/courts.py | 10 +++--- src/ds_caselaw_utils/neutral.py | 6 ++-- src/ds_caselaw_utils/test_neutral.py | 47 +++++++++++++------------- src/ds_caselaw_utils/types/__init__.py | 10 ++++++ 4 files changed, 42 insertions(+), 31 deletions(-) diff --git a/src/ds_caselaw_utils/courts.py b/src/ds_caselaw_utils/courts.py index 893bf7d..bc8939a 100644 --- a/src/ds_caselaw_utils/courts.py +++ b/src/ds_caselaw_utils/courts.py @@ -6,7 +6,7 @@ import pathlib from datetime import date -from typing import NewType, Optional +from typing import Optional from ruamel.yaml import YAML @@ -16,9 +16,7 @@ RawJurisdiction, ) -CourtCode = NewType("CourtCode", str) -CourtParam = NewType("CourtParam", str) -JurisdictionCode = NewType("JurisdictionCode", str) +from .types import CourtCode, CourtParam, JurisdictionCode, NeutralCitationString class Jurisdiction: @@ -36,7 +34,7 @@ def __init__(self, data: RawCourt) -> None: self.name: str = data["name"] self.grouped_name: str = data.get("grouped_name") or data["name"] self.link: str = data["link"] - self.ncn: Optional[str] = data.get("ncn") + self.ncn: Optional[NeutralCitationString] = NeutralCitationString(data["ncn"]) if "ncn" in data else None if "param" in data: self.canonical_param = CourtParam(data["param"]) self.param_aliases = [CourtParam(data["param"])] + [ @@ -85,7 +83,7 @@ def link(self) -> str: return self.court.link @property - def ncn(self) -> Optional[str]: + def ncn(self) -> Optional[NeutralCitationString]: return self.court.ncn @property diff --git a/src/ds_caselaw_utils/neutral.py b/src/ds_caselaw_utils/neutral.py index 8eb494e..ac6b0bd 100644 --- a/src/ds_caselaw_utils/neutral.py +++ b/src/ds_caselaw_utils/neutral.py @@ -8,13 +8,15 @@ from ruamel.yaml import YAML +from .types import NCNBasedUriString, NeutralCitationString + yaml = YAML() datafile = pathlib.Path(__file__).parent / "data/neutral_citation_regex.yaml" with open(datafile) as f: citation_data = yaml.load(f) -def neutral_url(citation: str) -> Optional[str]: +def neutral_url(citation: NeutralCitationString) -> Optional[NCNBasedUriString]: """Given a neutral citation such as `[2020] EAT 17`, return a public-API URL like `/eat/2020/17`, or None if no match is found. @@ -22,5 +24,5 @@ def neutral_url(citation: str) -> Optional[str]: for regex, groups in citation_data: if match := re.match(regex, citation): url_components = "/".join([match.groups()[x - 1] for x in groups]) - return f"/{url_components}".lower() + return NCNBasedUriString(f"/{url_components}".lower()) return None diff --git a/src/ds_caselaw_utils/test_neutral.py b/src/ds_caselaw_utils/test_neutral.py index 0123322..93d1807 100644 --- a/src/ds_caselaw_utils/test_neutral.py +++ b/src/ds_caselaw_utils/test_neutral.py @@ -1,34 +1,35 @@ import unittest from .neutral import neutral_url +from .types import NeutralCitationString class TestNeutralURL(unittest.TestCase): def test_good_neutral_urls(self): - self.assertEqual(neutral_url("[2022] UKSC 1"), "/uksc/2022/1") - self.assertEqual(neutral_url("[1604] EWCA Crim 555"), "/ewca/crim/1604/555") - self.assertEqual(neutral_url("[2022] EWHC 1 (Comm)"), "/ewhc/comm/2022/1") - self.assertEqual(neutral_url("[1999] EWCOP 7"), "/ewcop/1999/7") - self.assertEqual(neutral_url("[2022] UKUT 1 (IAC)"), "/ukut/iac/2022/1") - self.assertEqual(neutral_url("[2022] EAT 1"), "/eat/2022/1") - self.assertEqual(neutral_url("[2022] UKFTT 1 (TC)"), "/ukftt/tc/2022/1") - self.assertEqual(neutral_url("[2022] UKFTT 1 (GRC)"), "/ukftt/grc/2022/1") - self.assertEqual(neutral_url("[2022] EWHC 1 (KB)"), "/ewhc/kb/2022/1") - self.assertEqual(neutral_url("[2023] UKAIT 1"), "/ukait/2023/1") - self.assertEqual(neutral_url("[2024] EWCOP 17 (T2)"), "/ewcop/t2/2024/17") - self.assertEqual(neutral_url("[2000] UKIPTrib 99"), "/ukiptrib/2000/99") - self.assertEqual(neutral_url("[2000] EWCR 99"), "/ewcr/2000/99") - self.assertEqual(neutral_url("[2000] EWCC 99"), "/ewcc/2000/99") + self.assertEqual(neutral_url(NeutralCitationString("[2022] UKSC 1")), "/uksc/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[1604] EWCA Crim 555")), "/ewca/crim/1604/555") + self.assertEqual(neutral_url(NeutralCitationString("[2022] EWHC 1 (Comm)")), "/ewhc/comm/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[1999] EWCOP 7")), "/ewcop/1999/7") + self.assertEqual(neutral_url(NeutralCitationString("[2022] UKUT 1 (IAC)")), "/ukut/iac/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[2022] EAT 1")), "/eat/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[2022] UKFTT 1 (TC)")), "/ukftt/tc/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[2022] UKFTT 1 (GRC)")), "/ukftt/grc/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[2022] EWHC 1 (KB)")), "/ewhc/kb/2022/1") + self.assertEqual(neutral_url(NeutralCitationString("[2023] UKAIT 1")), "/ukait/2023/1") + self.assertEqual(neutral_url(NeutralCitationString("[2024] EWCOP 17 (T2)")), "/ewcop/t2/2024/17") + self.assertEqual(neutral_url(NeutralCitationString("[2000] UKIPTrib 99")), "/ukiptrib/2000/99") + self.assertEqual(neutral_url(NeutralCitationString("[2000] EWCR 99")), "/ewcr/2000/99") + self.assertEqual(neutral_url(NeutralCitationString("[2000] EWCC 99")), "/ewcc/2000/99") def test_bad_neutral_urls(self): - self.assertEqual(neutral_url(""), None) - self.assertEqual(neutral_url("1604] EWCA Crim 555"), None) - self.assertEqual(neutral_url("[2022 EWHC 1 Comm"), None) - self.assertEqual(neutral_url("[1999] EWCOP"), None) + self.assertEqual(neutral_url(NeutralCitationString("")), None) + self.assertEqual(neutral_url(NeutralCitationString("1604] EWCA Crim 555")), None) + self.assertEqual(neutral_url(NeutralCitationString("[2022 EWHC 1 Comm")), None) + self.assertEqual(neutral_url(NeutralCitationString("[1999] EWCOP")), None) self.assertEqual( - neutral_url("[2022] UKUT B1 IAC"), None + neutral_url(NeutralCitationString("[2022] UKUT B1 IAC")), None ) # Could be a Bailii reference, might want to drop B in future. - self.assertEqual(neutral_url("[2022] EAT A"), None) - self.assertEqual(neutral_url("[2022] NOTACOURT 1 TC"), None) - self.assertEqual(neutral_url("[2022] EWHC 1 (T2)"), None) - self.assertEqual(neutral_url("[2000] EWCRC 99"), None) + self.assertEqual(neutral_url(NeutralCitationString("[2022] EAT A")), None) + self.assertEqual(neutral_url(NeutralCitationString("[2022] NOTACOURT 1 TC")), None) + self.assertEqual(neutral_url(NeutralCitationString("[2022] EWHC 1 (T2)")), None) + self.assertEqual(neutral_url(NeutralCitationString("[2000] EWCRC 99")), None) diff --git a/src/ds_caselaw_utils/types/__init__.py b/src/ds_caselaw_utils/types/__init__.py index e69de29..abf43be 100644 --- a/src/ds_caselaw_utils/types/__init__.py +++ b/src/ds_caselaw_utils/types/__init__.py @@ -0,0 +1,10 @@ +from typing import NewType + +# Types which are used for identifying courts and their jurisdictions +CourtCode = NewType("CourtCode", str) +CourtParam = NewType("CourtParam", str) +JurisdictionCode = NewType("JurisdictionCode", str) + +# Types which are used to identify a judgment +NeutralCitationString = NewType("NeutralCitationString", str) +NCNBasedUriString = NewType("NCNBasedUriString", str)