diff --git a/src/caselawclient/models/documents.py b/src/caselawclient/models/documents.py index 45b5e5ee..cee76437 100644 --- a/src/caselawclient/models/documents.py +++ b/src/caselawclient/models/documents.py @@ -1,4 +1,5 @@ import datetime +import warnings from functools import cached_property from typing import TYPE_CHECKING, Any, Dict, NewType, Optional @@ -26,6 +27,11 @@ uri_for_s3, ) + +class UnparsableDate(Warning): + pass + + DOCUMENT_STATUS_HOLD = "On hold" """ This document has been placed on hold to actively prevent publication. """ @@ -171,10 +177,19 @@ def document_date_as_string(self) -> str: ) @cached_property - def document_date_as_date(self) -> datetime.date: - return datetime.datetime.strptime( - self.document_date_as_string, "%Y-%m-%d" - ).date() + def document_date_as_date(self) -> Optional[datetime.date]: + if not self.document_date_as_string: + return None + try: + return datetime.datetime.strptime( + self.document_date_as_string, "%Y-%m-%d" + ).date() + except ValueError: + warnings.warn( + f"Unparsable date encountered: {self.document_date_as_string}", + UnparsableDate, + ) + return None def get_manifestation_datetimes(self, name: str) -> list[datetime.datetime]: iso_datetimes = self._get_xpath_match_strings( diff --git a/tests/models/test_documents.py b/tests/models/test_documents.py index f4fc7098..a196bde1 100644 --- a/tests/models/test_documents.py +++ b/tests/models/test_documents.py @@ -17,6 +17,7 @@ CannotPublishUnpublishableDocument, Document, DocumentNotSafeForDeletion, + UnparsableDate, ) @@ -490,6 +491,54 @@ def test_date_as_string(self, opening_tag, closing_tag, mock_api_client): "test/1234", show_unpublished=True ) + @pytest.mark.parametrize( + "opening_tag, closing_tag", + [ + ("judgment", "judgment"), + ('doc name="pressSummary"', "doc"), + ], + ) + def test_bad_date_as_string(self, opening_tag, closing_tag, mock_api_client): + mock_api_client.get_judgment_xml_bytestring.return_value = f""" + + <{opening_tag}> + + + + + + + + + + """.encode( + "utf-8" + ) + + document = Document("test/1234", mock_api_client) + + assert document.document_date_as_string == "kitten" + with pytest.warns(UnparsableDate): + assert document.document_date_as_date is None + mock_api_client.get_judgment_xml_bytestring.assert_called_once_with( + "test/1234", show_unpublished=True + ) + + def test_absent_date_as_string(self, mock_api_client): + mock_api_client.get_judgment_xml_bytestring.return_value = """ + + + """.encode( + "utf-8" + ) + + document = Document("test/1234", mock_api_client) + + assert document.document_date_as_string == "" + assert document.document_date_as_date is None + def test_dates(self, mock_api_client): mock_api_client.get_judgment_xml_bytestring.return_value = """