diff --git a/poetry.lock b/poetry.lock index 7b9fbd68..a250de86 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1137,13 +1137,13 @@ docs = ["sphinx (>=8,<9)", "sphinx-autobuild"] [[package]] name = "mypy-boto3-s3" -version = "1.35.76.post1" -description = "Type annotations for boto3 S3 1.35.76 service generated with mypy-boto3-builder 8.6.3" +version = "1.35.81" +description = "Type annotations for boto3 S3 1.35.81 service generated with mypy-boto3-builder 8.6.4" optional = false python-versions = ">=3.8" files = [ - {file = "mypy_boto3_s3-1.35.76.post1-py3-none-any.whl", hash = "sha256:fd4a8734c3bb5a2da52e22258b1836a14aa3460816df25c831790e464334021f"}, - {file = "mypy_boto3_s3-1.35.76.post1.tar.gz", hash = "sha256:34ac4cacf8acdafa6e71a2810116b2546376f241761f9eec6ac5a9887309372b"}, + {file = "mypy_boto3_s3-1.35.81-py3-none-any.whl", hash = "sha256:6af1d815ff2cc8e32ca1190c7387f94341c1607444f958ac283aa10b1d11db08"}, + {file = "mypy_boto3_s3-1.35.81.tar.gz", hash = "sha256:fe1a6860c0ca7016e24089819433c0d5835d4a57635bb42628645b71271b946c"}, ] [package.dependencies] diff --git a/src/caselawclient/models/identifiers/unpacker.py b/src/caselawclient/models/identifiers/unpacker.py index c98937a8..7233db86 100644 --- a/src/caselawclient/models/identifiers/unpacker.py +++ b/src/caselawclient/models/identifiers/unpacker.py @@ -1,4 +1,5 @@ from typing import Optional +from warnings import warn from lxml import etree @@ -19,12 +20,13 @@ def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element return identifiers for identifier_etree in identifiers_etree.findall("identifier"): identifier = unpack_an_identifier_from_etree(identifier_etree) - identifiers.add(identifier) + if identifier: + identifiers.add(identifier) return identifiers -def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifier: - """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier.""" +def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]: + """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`).""" namespace_element = identifier_xml.find("namespace") @@ -33,6 +35,11 @@ def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifie "Identifer XML representation is not valid: namespace not present or empty" ) + # If the identifier namespace isn't known, fail out + if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP: + warn(f"Identifier type {namespace_element.text} is not known.") + return None + kwargs: dict[str, str] = {} for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES: diff --git a/tests/models/identifiers/test_identifer_unpacking.py b/tests/models/identifiers/test_identifer_unpacking.py index 0911d37e..499d9bbd 100644 --- a/tests/models/identifiers/test_identifer_unpacking.py +++ b/tests/models/identifiers/test_identifer_unpacking.py @@ -1,3 +1,4 @@ +import unittest from unittest.mock import patch from lxml import etree @@ -7,7 +8,7 @@ from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree, unpack_an_identifier_from_etree -class TestIdentifierUnpacking: +class TestIdentifierUnpacking(unittest.TestCase): @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier}) def test_unpack_identifier(self): xml_tree = etree.fromstring(""" @@ -24,6 +25,21 @@ def test_unpack_identifier(self): assert unpacked_identifier.uuid == "2d80bf1d-e3ea-452f-965c-041f4399f2dd" assert unpacked_identifier.value == "TEST-123" + @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier}) + def test_unpack_unknown_identifier(self): + xml_tree = etree.fromstring(""" + + unknown + 2d80bf1d-e3ea-452f-965c-041f4399f2dd + UK-123 + + """) + + with self.assertWarnsRegex(Warning, "Identifier type unknown is not known."): + unpacked_identifier = unpack_an_identifier_from_etree(xml_tree) + + assert unpacked_identifier is None + class TestIdentifierPackUnpackRoundTrip: @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier})