From 51b6dc9fd7591b3de82583b11ae6aeb1f0ad5aa3 Mon Sep 17 00:00:00 2001
From: Nick Jackson <nick@dxw.com>
Date: Wed, 18 Dec 2024 09:31:09 +0000
Subject: [PATCH 1/5] fix(IdentifierSchema): use hasattr instead of getattr
 with a default when testing required attributes

Using `getattr` with a default of `False` will raise an unexpected error when the attribute is present and has a value of `False`. `hasattr` simply checks for its existence, regardless of value.
---
 src/caselawclient/models/identifiers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caselawclient/models/identifiers/__init__.py b/src/caselawclient/models/identifiers/__init__.py
index d87a3015..47ad65eb 100644
--- a/src/caselawclient/models/identifiers/__init__.py
+++ b/src/caselawclient/models/identifiers/__init__.py
@@ -38,7 +38,7 @@ def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
             "name",
             "namespace",
         ):
-            if not getattr(cls, required, False):
+            if not hasattr(cls, required):
                 raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
         super().__init_subclass__(**kwargs)
 

From 13855d53bca97c061520c64dea21f868743ebcfc Mon Sep 17 00:00:00 2001
From: Nick Jackson <nick@dxw.com>
Date: Wed, 18 Dec 2024 09:49:13 +0000
Subject: [PATCH 2/5] feat(FCL-533): add scoring to Identifiers

This allows us to decide which of a range of possible identifiers should be preferred for a document
---
 .../models/identifiers/__init__.py            | 24 ++++++++
 src/caselawclient/models/identifiers/fclid.py |  2 +
 .../models/identifiers/neutral_citation.py    |  2 +
 tests/models/identifiers/test_identifiers.py  | 59 ++++++++++++-------
 4 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/src/caselawclient/models/identifiers/__init__.py b/src/caselawclient/models/identifiers/__init__.py
index 47ad65eb..1a6a8c5e 100644
--- a/src/caselawclient/models/identifiers/__init__.py
+++ b/src/caselawclient/models/identifiers/__init__.py
@@ -32,11 +32,18 @@ class IdentifierSchema(ABC):
     name: str
     namespace: str
 
+    human_readable: bool
+    """ Should this identifier type be considered for display as a 'human readable' identifier? """
+
+    base_score_multiplier: float = 1.0
+    """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
+
     def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
         """Ensure that subclasses have the required attributes set."""
         for required in (
             "name",
             "namespace",
+            "human_readable",
         ):
             if not hasattr(cls, required):
                 raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
@@ -101,6 +108,11 @@ def as_xml_tree(self) -> etree._Element:
     def url_slug(self) -> str:
         return self.schema.compile_identifier_url_slug(self.value)
 
+    @property
+    def score(self) -> float:
+        """Return the score of this identifier, used to calculate the preferred identifier for a document."""
+        return 1 * self.schema.base_score_multiplier
+
     def same_as(self, other: "Identifier") -> bool:
         "Is this the same as another identifier (in value and schema)?"
         return self.value == other.value and self.schema == other.schema
@@ -149,3 +161,15 @@ def as_etree(self) -> etree._Element:
             identifiers_root.append(identifier.as_xml_tree)
 
         return identifiers_root
+
+    @property
+    def by_score(self) -> list[Identifier]:
+        """Return a list of identifiers, sorted by their score in descending order."""
+        return sorted(self.values(), key=lambda v: v.score, reverse=True)
+
+    @property
+    def preferred(self) -> Optional[Identifier]:
+        """If this document has identifiers, return the preferred one."""
+        if len(self) == 0:
+            return None
+        return self.by_score[0]
diff --git a/src/caselawclient/models/identifiers/fclid.py b/src/caselawclient/models/identifiers/fclid.py
index 83e75891..a5dd68be 100644
--- a/src/caselawclient/models/identifiers/fclid.py
+++ b/src/caselawclient/models/identifiers/fclid.py
@@ -27,6 +27,8 @@ class FindCaseLawIdentifierSchema(IdentifierSchema):
 
     name = "Find Case Law Identifier"
     namespace = "fclid"
+    human_readable = False
+    base_score_multiplier = 0.8
 
     @classmethod
     def validate_identifier(cls, value: str) -> bool:
diff --git a/src/caselawclient/models/identifiers/neutral_citation.py b/src/caselawclient/models/identifiers/neutral_citation.py
index b8fc6fe2..e4d40332 100644
--- a/src/caselawclient/models/identifiers/neutral_citation.py
+++ b/src/caselawclient/models/identifiers/neutral_citation.py
@@ -30,6 +30,8 @@ class NeutralCitationNumberSchema(IdentifierSchema):
 
     name = "Neutral Citation Number"
     namespace = "ukncn"
+    human_readable = True
+    base_score_multiplier = 1.5
 
     @classmethod
     def validate_identifier(cls, value: str) -> bool:
diff --git a/tests/models/identifiers/test_identifiers.py b/tests/models/identifiers/test_identifiers.py
index d317eae8..b7739cf0 100644
--- a/tests/models/identifiers/test_identifiers.py
+++ b/tests/models/identifiers/test_identifiers.py
@@ -5,6 +5,25 @@
 from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
 
 
+class TestIdentifierSchema(IdentifierSchema):
+    __test__ = False
+
+    name = "Test Schema"
+    namespace = "test"
+    human_readable = True
+    base_score_multiplier = 2.5
+
+    @classmethod
+    def compile_identifier_url_slug(cls, value: str) -> str:
+        return value.lower()
+
+
+class TestIdentifier(Identifier):
+    __test__ = False
+
+    schema = TestIdentifierSchema
+
+
 @pytest.fixture
 def identifiers():
     return Identifiers(
@@ -12,13 +31,18 @@ def identifiers():
     )
 
 
+TEST_NCN_1701 = NeutralCitationNumber(value="[1701] UKSC 999")
+TEST_NCN_1234 = NeutralCitationNumber(value="[1234] UKSC 999")
+TEST_IDENTIFIER_999 = TestIdentifier("TEST-999")
+
+
 @pytest.fixture
 def mixed_identifiers():
     return Identifiers(
         {
-            "id-A": NeutralCitationNumber(value="[1701] UKSC 999"),
-            "id-B": TestIdentifier("TEST-999"),
-            "id-C": NeutralCitationNumber(value="[1234] UKSC 999"),
+            "id-A": TEST_NCN_1701,
+            "id-B": TEST_IDENTIFIER_999,
+            "id-C": TEST_NCN_1234,
         }
     )
 
@@ -28,23 +52,6 @@ def id3():
     return TestIdentifier(uuid="id-3", value="TEST-333")
 
 
-class TestIdentifierSchema(IdentifierSchema):
-    __test__ = False
-
-    name = "Test Schema"
-    namespace = "test"
-
-    @classmethod
-    def compile_identifier_url_slug(cls, value: str) -> str:
-        return value.lower()
-
-
-class TestIdentifier(Identifier):
-    __test__ = False
-
-    schema = TestIdentifierSchema
-
-
 class TestIdentifierBase:
     def test_uuid_setting(self):
         """Ensure that if a UUID is provided when initialising an Identifier that it is properly stored."""
@@ -116,3 +123,15 @@ def test_delete_type(self, mixed_identifiers):
         assert "TEST-999" in str(mixed_identifiers)
         assert "[1701] UKSC 999" not in str(mixed_identifiers)
         assert "[1234] UKSC 999" not in str(mixed_identifiers)
+
+
+class TestIdentifierScoring:
+    def test_base_scoring(self):
+        identifier = TestIdentifier(value="TEST-123")
+        assert identifier.score == 2.5
+
+    def test_sorting(self, mixed_identifiers):
+        assert mixed_identifiers.by_score == [TEST_IDENTIFIER_999, TEST_NCN_1701, TEST_NCN_1234]
+
+    def test_preferred_identifier(self, mixed_identifiers):
+        assert mixed_identifiers.preferred == TEST_IDENTIFIER_999

From ec2c226c907a2f77c410b0b8a914c0f8afd5f694 Mon Sep 17 00:00:00 2001
From: Nick Jackson <nick@dxw.com>
Date: Wed, 18 Dec 2024 10:41:05 +0000
Subject: [PATCH 3/5] feat(FCL-533): modify human identifier to rely on
 identifiers framework

This is instead of assuming that an NCN will always be present. As a result, this has had knock-on changes in some other places which were previously relying on this incorrect behaviour. This includes some breaking changes to method signatures, which can now return a `None` in places they previously could not. Since relying on the presence of an NCN is now considered dangerous, these methods have been marked as deprecated.

BREAKING CHANGE: Methods which were previously guaranteed to return a Neutral Citation may now return `None`.
---
 .../models/documents/__init__.py              | 19 ++++++++-------
 src/caselawclient/models/judgments.py         | 23 ++++++++-----------
 .../models/neutral_citation_mixin.py          | 12 ++++++----
 src/caselawclient/models/press_summaries.py   | 21 ++++++++---------
 tests/models/test_judgments.py                |  6 +++--
 tests/models/test_press_summaries.py          |  6 +++--
 6 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/src/caselawclient/models/documents/__init__.py b/src/caselawclient/models/documents/__init__.py
index 6af19c3e..a85f1763 100644
--- a/src/caselawclient/models/documents/__init__.py
+++ b/src/caselawclient/models/documents/__init__.py
@@ -15,6 +15,7 @@
     NotSupportedOnVersion,
     OnlySupportedOnVersion,
 )
+from caselawclient.models.identifiers import Identifier
 from caselawclient.models.identifiers.fclid import FindCaseLawIdentifier, FindCaseLawIdentifierSchema
 from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
 from caselawclient.models.utilities import VersionsDict, extract_version, render_versions
@@ -171,13 +172,11 @@ def _initialise_identifiers(self) -> None:
         self.identifiers = unpack_all_identifiers_from_etree(identifiers_element_as_etree)
 
     @property
-    def best_human_identifier(self) -> Optional[str]:
-        """
-        Some identifier that is understood by legal professionals to refer to this legal event
-        that is not the name of the document.
-        Typically, this will be the neutral citation number, should it exist.
-        Should typically be overridden in subclasses.
-        """
+    def best_human_identifier(self) -> Optional[Identifier]:
+        """Return the preferred identifier for the document, providing that it is considered human readable."""
+        preferred_identifier = self.identifiers.preferred
+        if preferred_identifier and preferred_identifier.schema.human_readable:
+            return preferred_identifier
         return None
 
     @property
@@ -507,13 +506,17 @@ def force_reparse(self) -> None:
             "documentType": parser_type_noun,
             "metadata": {
                 "name": self.body.name or None,
-                "cite": self.best_human_identifier or None,
+                "cite": None,
                 "court": self.body.court or None,
                 "date": checked_date,
                 "uri": self.uri,
             },
         }
 
+        ## TODO: Remove this hack around the fact that NCNs are assumed to be present for all documents' metadata, but actually different document classes may have different metadata
+        if hasattr(self, "neutral_citation"):
+            parser_instructions["metadata"]["cite"] = self.neutral_citation
+
         request_parse(
             uri=self.uri,
             reference=self.consignment_reference,
diff --git a/src/caselawclient/models/judgments.py b/src/caselawclient/models/judgments.py
index 730c3c0c..a2d0fffe 100644
--- a/src/caselawclient/models/judgments.py
+++ b/src/caselawclient/models/judgments.py
@@ -25,20 +25,17 @@ def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
         super().__init__(self.document_noun, uri, *args, **kwargs)
 
     @cached_property
-    def neutral_citation(self) -> NeutralCitationString:
-        return NeutralCitationString(
-            self.body.get_xpath_match_string(
-                "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
-                {
-                    "uk": "https://caselaw.nationalarchives.gov.uk/akn",
-                    "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
-                },
-            )
+    def neutral_citation(self) -> Optional[NeutralCitationString]:
+        value_in_xml = self.body.get_xpath_match_string(
+            "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:cite/text()",
+            {
+                "uk": "https://caselaw.nationalarchives.gov.uk/akn",
+                "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
+            },
         )
-
-    @property
-    def best_human_identifier(self) -> str:
-        return self.neutral_citation
+        if value_in_xml:
+            return NeutralCitationString(value_in_xml)
+        return None
 
     @cached_property
     def linked_document(self) -> Optional["PressSummary"]:
diff --git a/src/caselawclient/models/neutral_citation_mixin.py b/src/caselawclient/models/neutral_citation_mixin.py
index 6b55a125..7f5c38df 100644
--- a/src/caselawclient/models/neutral_citation_mixin.py
+++ b/src/caselawclient/models/neutral_citation_mixin.py
@@ -1,9 +1,10 @@
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import Any
+from typing import Any, Optional
 
 from ds_caselaw_utils import neutral_url
 from ds_caselaw_utils.types import NeutralCitationString
+from typing_extensions import deprecated
 
 
 class NeutralCitationMixin(ABC):
@@ -38,12 +39,15 @@ def __init__(self, document_noun: str, *args: Any, **kwargs: Any) -> None:
 
     @cached_property
     @abstractmethod
-    def neutral_citation(self) -> NeutralCitationString: ...
+    @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
+    def neutral_citation(self) -> Optional[NeutralCitationString]: ...
 
     @cached_property
+    @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
     def has_ncn(self) -> bool:
-        return bool(self.neutral_citation)
+        return self.neutral_citation is not None and self.neutral_citation != ""
 
     @cached_property
+    @deprecated("Legacy usage of NCNs is deprecated; you should be moving to the Identifiers framework")
     def has_valid_ncn(self) -> bool:
-        return self.has_ncn and neutral_url(self.neutral_citation) is not None
+        return self.neutral_citation is not None and neutral_url(self.neutral_citation) is not None
diff --git a/src/caselawclient/models/press_summaries.py b/src/caselawclient/models/press_summaries.py
index 2996c132..587ba051 100644
--- a/src/caselawclient/models/press_summaries.py
+++ b/src/caselawclient/models/press_summaries.py
@@ -27,19 +27,16 @@ def __init__(self, uri: DocumentURIString, *args: Any, **kwargs: Any) -> None:
         super().__init__(self.document_noun, uri, *args, **kwargs)
 
     @cached_property
-    def neutral_citation(self) -> NeutralCitationString:
-        return NeutralCitationString(
-            self.body.get_xpath_match_string(
-                "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
-                {
-                    "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
-                },
-            )
+    def neutral_citation(self) -> Optional[NeutralCitationString]:
+        value_in_xml = self.body.get_xpath_match_string(
+            "/akn:akomaNtoso/akn:doc/akn:preface/akn:p/akn:neutralCitation/text()",
+            {
+                "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
+            },
         )
-
-    @property
-    def best_human_identifier(self) -> str:
-        return self.neutral_citation
+        if value_in_xml:
+            return NeutralCitationString(value_in_xml)
+        return None
 
     @cached_property
     def linked_document(self) -> Optional[Judgment]:
diff --git a/tests/models/test_judgments.py b/tests/models/test_judgments.py
index 814ea087..af43bb2f 100644
--- a/tests/models/test_judgments.py
+++ b/tests/models/test_judgments.py
@@ -5,6 +5,7 @@
 from caselawclient.errors import DocumentNotFoundError
 from caselawclient.factories import PressSummaryFactory
 from caselawclient.models.documents import DocumentURIString
+from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
 from caselawclient.models.judgments import Judgment
 from caselawclient.models.neutral_citation_mixin import NeutralCitationString
 
@@ -12,8 +13,9 @@
 class TestJudgment:
     def test_best_identifier(self, mock_api_client):
         judgment = Judgment(DocumentURIString("test/1234"), mock_api_client)
-        judgment.neutral_citation = NeutralCitationString("[2023] TEST 1234")
-        assert judgment.best_human_identifier == judgment.neutral_citation
+        document_ncn = NeutralCitationNumber(value="[2023] TEST 1234")
+        judgment.identifiers.add(document_ncn)
+        assert judgment.best_human_identifier == document_ncn
 
 
 class TestJudgmentValidation:
diff --git a/tests/models/test_press_summaries.py b/tests/models/test_press_summaries.py
index cd4e461b..d5bf3251 100644
--- a/tests/models/test_press_summaries.py
+++ b/tests/models/test_press_summaries.py
@@ -5,6 +5,7 @@
 from caselawclient.errors import DocumentNotFoundError
 from caselawclient.factories import JudgmentFactory
 from caselawclient.models.documents import DocumentURIString
+from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
 from caselawclient.models.neutral_citation_mixin import NeutralCitationString
 from caselawclient.models.press_summaries import PressSummary
 
@@ -12,8 +13,9 @@
 class TestPressSummary:
     def test_best_identifier(self, mock_api_client):
         summary = PressSummary(DocumentURIString("test/1234"), mock_api_client)
-        summary.neutral_citation = NeutralCitationString("[2023] TEST 1234")
-        assert summary.best_human_identifier == summary.neutral_citation
+        document_ncn = NeutralCitationNumber(value="[2023] TEST 1234")
+        summary.identifiers.add(document_ncn)
+        assert summary.best_human_identifier == document_ncn
 
 
 class TestPressSummaryValidation:

From 22cc54d3d941495e1aff2b86a2f24ccb8bd62983 Mon Sep 17 00:00:00 2001
From: Nick Jackson <nick@dxw.com>
Date: Wed, 18 Dec 2024 10:42:35 +0000
Subject: [PATCH 4/5] docs: update CHANGELOG

---
 CHANGELOG.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a225f0bf..67edc429 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,21 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog 1.0.0].
 
+## Unreleased
+
+### BREAKING CHANGE
+
+- Methods which were previously guaranteed to return a Neutral Citation may now return `None`.
+
+### Feat
+
+- **FCL-533**: modify human identifier to rely on identifiers framework
+- **FCL-533**: add scoring to Identifiers
+
+### Fix
+
+- **IdentifierSchema**: use hasattr instead of getattr with a default when testing required attributes
+
 ## v28.2.0 (2024-12-17)
 
 ### Feat

From b2044547e9ad06b8595dafe7ddcccc8f46a0eab0 Mon Sep 17 00:00:00 2001
From: Nick Jackson <nick@dxw.com>
Date: Wed, 18 Dec 2024 11:17:38 +0000
Subject: [PATCH 5/5] feat(FCL-533): getting scored or preferred identifiers
 can now be done by type

---
 CHANGELOG.md                                  |  1 +
 .../models/documents/__init__.py              |  2 +-
 .../models/identifiers/__init__.py            | 23 ++++++++++++-------
 tests/models/identifiers/test_identifiers.py  | 10 ++++++--
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67edc429..e874bb8c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog 1.0.0].
 
 ### Feat
 
+- **FCL-533**: getting scored or preferred identifiers can now be done by type
 - **FCL-533**: modify human identifier to rely on identifiers framework
 - **FCL-533**: add scoring to Identifiers
 
diff --git a/src/caselawclient/models/documents/__init__.py b/src/caselawclient/models/documents/__init__.py
index a85f1763..01ba119a 100644
--- a/src/caselawclient/models/documents/__init__.py
+++ b/src/caselawclient/models/documents/__init__.py
@@ -174,7 +174,7 @@ def _initialise_identifiers(self) -> None:
     @property
     def best_human_identifier(self) -> Optional[Identifier]:
         """Return the preferred identifier for the document, providing that it is considered human readable."""
-        preferred_identifier = self.identifiers.preferred
+        preferred_identifier = self.identifiers.preferred()
         if preferred_identifier and preferred_identifier.schema.human_readable:
             return preferred_identifier
         return None
diff --git a/src/caselawclient/models/identifiers/__init__.py b/src/caselawclient/models/identifiers/__init__.py
index 1a6a8c5e..a87a3669 100644
--- a/src/caselawclient/models/identifiers/__init__.py
+++ b/src/caselawclient/models/identifiers/__init__.py
@@ -162,14 +162,21 @@ def as_etree(self) -> etree._Element:
 
         return identifiers_root
 
-    @property
-    def by_score(self) -> list[Identifier]:
-        """Return a list of identifiers, sorted by their score in descending order."""
-        return sorted(self.values(), key=lambda v: v.score, reverse=True)
+    def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
+        """
+        :param type: Optionally, an identifier type to constrain this list to.
 
-    @property
-    def preferred(self) -> Optional[Identifier]:
-        """If this document has identifiers, return the preferred one."""
+        :return: Return a list of identifiers, sorted by their score in descending order.
+        """
+        identifiers = self.of_type(type) if type else list(self.values())
+        return sorted(identifiers, key=lambda v: v.score, reverse=True)
+
+    def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
+        """
+        :param type: Optionally, an identifier type to constrain the results to.
+
+        :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
+        """
         if len(self) == 0:
             return None
-        return self.by_score[0]
+        return self.by_score(type)[0]
diff --git a/tests/models/identifiers/test_identifiers.py b/tests/models/identifiers/test_identifiers.py
index b7739cf0..94a034ab 100644
--- a/tests/models/identifiers/test_identifiers.py
+++ b/tests/models/identifiers/test_identifiers.py
@@ -131,7 +131,13 @@ def test_base_scoring(self):
         assert identifier.score == 2.5
 
     def test_sorting(self, mixed_identifiers):
-        assert mixed_identifiers.by_score == [TEST_IDENTIFIER_999, TEST_NCN_1701, TEST_NCN_1234]
+        assert mixed_identifiers.by_score() == [TEST_IDENTIFIER_999, TEST_NCN_1701, TEST_NCN_1234]
 
     def test_preferred_identifier(self, mixed_identifiers):
-        assert mixed_identifiers.preferred == TEST_IDENTIFIER_999
+        assert mixed_identifiers.preferred() == TEST_IDENTIFIER_999
+
+    def test_sorting_with_type(self, mixed_identifiers):
+        assert mixed_identifiers.by_score(type=NeutralCitationNumber) == [TEST_NCN_1701, TEST_NCN_1234]
+
+    def test_preferred_identifier_with_type(self, mixed_identifiers):
+        assert mixed_identifiers.preferred(type=NeutralCitationNumber) == TEST_NCN_1701