diff --git a/CHANGELOG.md b/CHANGELOG.md index 63ab7015..87a68deb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog 1.0.0]. +## Unreleased + +### Fix + +- **Identifiers**: fix case where unpacking unknown identifier type would raise an exception + ## v29.0.0 (2024-12-18) ### BREAKING CHANGE diff --git a/src/caselawclient/models/identifiers/unpacker.py b/src/caselawclient/models/identifiers/unpacker.py index c98937a8..7233db86 100644 --- a/src/caselawclient/models/identifiers/unpacker.py +++ b/src/caselawclient/models/identifiers/unpacker.py @@ -1,4 +1,5 @@ from typing import Optional +from warnings import warn from lxml import etree @@ -19,12 +20,13 @@ def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element return identifiers for identifier_etree in identifiers_etree.findall("identifier"): identifier = unpack_an_identifier_from_etree(identifier_etree) - identifiers.add(identifier) + if identifier: + identifiers.add(identifier) return identifiers -def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifier: - """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier.""" +def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]: + """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`).""" namespace_element = identifier_xml.find("namespace") @@ -33,6 +35,11 @@ def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Identifie "Identifer XML representation is not valid: namespace not present or empty" ) + # If the identifier namespace isn't known, fail out + if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP: + warn(f"Identifier type {namespace_element.text} is not known.") + return None + kwargs: dict[str, str] = {} for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES: diff --git a/tests/models/identifiers/test_identifer_unpacking.py b/tests/models/identifiers/test_identifer_unpacking.py index 0911d37e..499d9bbd 100644 --- a/tests/models/identifiers/test_identifer_unpacking.py +++ b/tests/models/identifiers/test_identifer_unpacking.py @@ -1,3 +1,4 @@ +import unittest from unittest.mock import patch from lxml import etree @@ -7,7 +8,7 @@ from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree, unpack_an_identifier_from_etree -class TestIdentifierUnpacking: +class TestIdentifierUnpacking(unittest.TestCase): @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier}) def test_unpack_identifier(self): xml_tree = etree.fromstring(""" @@ -24,6 +25,21 @@ def test_unpack_identifier(self): assert unpacked_identifier.uuid == "2d80bf1d-e3ea-452f-965c-041f4399f2dd" assert unpacked_identifier.value == "TEST-123" + @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier}) + def test_unpack_unknown_identifier(self): + xml_tree = etree.fromstring(""" + + unknown + 2d80bf1d-e3ea-452f-965c-041f4399f2dd + UK-123 + + """) + + with self.assertWarnsRegex(Warning, "Identifier type unknown is not known."): + unpacked_identifier = unpack_an_identifier_from_etree(xml_tree) + + assert unpacked_identifier is None + class TestIdentifierPackUnpackRoundTrip: @patch("caselawclient.models.identifiers.unpacker.IDENTIFIER_NAMESPACE_MAP", {"test": TestIdentifier})