From 120954252de0e81e98843eac41ac988c91e46fff Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Tue, 25 Apr 2023 11:47:36 +0100 Subject: [PATCH] Improve robustness of SearchResult date parsing This is now tested, and will log a warning if a non-empty unparseable string is detected. --- judgments/models.py | 7 ++++++- judgments/tests/test_search.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/judgments/models.py b/judgments/models.py index 12e899866..6df78509e 100644 --- a/judgments/models.py +++ b/judgments/models.py @@ -1,3 +1,4 @@ +import logging from os.path import dirname, join from caselawclient.Client import api_client @@ -29,7 +30,11 @@ def __init__( try: self.date = dateparser.parse(date) - except ParserError: + except ParserError as e: + if date != "": + logging.warning( + f'Unable to parse document date "{date}". Full error: {e}' + ) self.date = None try: self.court = courts.get_by_code(court) diff --git a/judgments/tests/test_search.py b/judgments/tests/test_search.py index 46650adf1..a6b22b8c8 100644 --- a/judgments/tests/test_search.py +++ b/judgments/tests/test_search.py @@ -1,5 +1,9 @@ +from datetime import datetime +from logging import WARNING +from typing import Any from unittest.mock import patch +import pytest from dateutil import parser as dateparser from django.test import TestCase from lxml import etree @@ -159,3 +163,33 @@ def test_create_from_node_with_missing_elements(self, fake_client): self.assertEqual(None, search_result.neutral_citation) self.assertEqual(None, search_result.court) self.assertEqual(None, search_result.content_hash) + + +class TestSearchResultInit: + @pytest.mark.parametrize( + "date_string, expected", + [ + ["", None], + ["2023-05-09", datetime(2023, 5, 9, 0, 0)], + ["2023-04-05T06:54:00", datetime(2023, 4, 5, 6, 54)], + ["ffffff", None], + ], + ) + def test_searchresult_date_parsing(self, date_string: str, expected: Any): + search_result = fake_search_result(date=date_string) + + assert search_result.date == expected + + def test_unparseable_non_empty_string_logs_warning(self, caplog): + caplog.set_level(WARNING) + + fake_search_result(date="ffffff") + + assert "Unable to parse document date" in caplog.text + + def test_unparseable_empty_string_doesnt_log_warning(self, caplog): + caplog.set_level(WARNING) + + fake_search_result(date="") + + assert "Unable to parse document date" not in caplog.text