From cfb7b1a224ed7742aeb801cf38584aba342b33f7 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Thu, 28 Nov 2024 14:58:01 +0000 Subject: [PATCH 1/3] fix tests --- .../tests/test_processing_functions.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/preprocessing/nextclade/tests/test_processing_functions.py b/preprocessing/nextclade/tests/test_processing_functions.py index 68d604f0c..8b0b2918a 100644 --- a/preprocessing/nextclade/tests/test_processing_functions.py +++ b/preprocessing/nextclade/tests/test_processing_functions.py @@ -482,27 +482,40 @@ def test_format_authors() -> None: raise AssertionError(msg) -def test_parse_date_into_range(): +def test_parse_date_into_range() -> None: assert ProcessingFunctions.parse_date_into_range( {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeString"} - ), "2021-12" + ).datum == "2021-12" assert ProcessingFunctions.parse_date_into_range( {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeLower"} - ), "2021-12-01" + ).datum == "2021-12-01" assert ProcessingFunctions.parse_date_into_range( {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeUpper"} - ), "2021-12-31" + ).datum == "2021-12-31" assert ProcessingFunctions.parse_date_into_range( {"date": "2021-02"}, "field_name", {"fieldType": "dateRangeUpper"} - ), "2021-02-28" + ).datum == "2021-02-28" assert ProcessingFunctions.parse_date_into_range( {"date": "2021"}, "field_name", {"fieldType": "dateRangeUpper"} - ), "2021-12-31" + ).datum == "2021-12-31" assert ProcessingFunctions.parse_date_into_range( {"date": "2021-12", "releaseDate": "2021-12-15"}, "field_name", {"fieldType": "dateRangeUpper"}, - ), "2021-12-15" + ).datum == "2021-12-15" + assert ProcessingFunctions.parse_date_into_range( + {"date": "", "releaseDate": "2021-12-15"}, + "field_name", + {"fieldType": "dateRangeUpper"}, + ).datum == "2021-12-15" + assert ProcessingFunctions.parse_date_into_range( + {"date": ""}, "field_name", {"fieldType": "dateRangeString"} + ).datum is None + assert ProcessingFunctions.parse_date_into_range( + {"date": "", "releaseDate": "2021-12-15"}, + "field_name", + {"fieldType": "dateRangeLower"}, + ).datum is None if __name__ == "__main__": From 4fc1fb4d2db260d7ee6647c5e5427c3c505230dd Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Thu, 28 Nov 2024 15:11:37 +0000 Subject: [PATCH 2/3] remove unused case --- .../src/loculus_preprocessing/processing_functions.py | 10 ++-------- .../nextclade/tests/test_processing_functions.py | 3 +++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py b/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py index f25fc276e..9c6338c18 100644 --- a/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py +++ b/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py @@ -220,6 +220,8 @@ def parse_date_into_range( except Exception: release_date = None + logger.debug(f"release_date: {release_date}") + max_upper_limit = min(filter(None, [datetime.now(tz=pytz.utc), release_date])) if not date_str: @@ -231,8 +233,6 @@ def parse_date_into_range( errors=[], ) - logger.debug(f"release_date: {release_date}") - formats_to_messages = { "%Y-%m-%d": None, "%Y-%m": "Day is missing. Assuming date is some time in the month.", @@ -276,12 +276,6 @@ class DateRange: date_range_lower=parsed_date.replace(month=1, day=1), date_range_upper=parsed_date.replace(month=12, day=31), ) - case "_": - datum = DateRange( - date_range_string=None, - date_range_lower=None, - date_range_upper=max_upper_limit, - ) logger.debug(f"parsed_date: {datum}") diff --git a/preprocessing/nextclade/tests/test_processing_functions.py b/preprocessing/nextclade/tests/test_processing_functions.py index 8b0b2918a..342d5f8ea 100644 --- a/preprocessing/nextclade/tests/test_processing_functions.py +++ b/preprocessing/nextclade/tests/test_processing_functions.py @@ -511,6 +511,9 @@ def test_parse_date_into_range() -> None: assert ProcessingFunctions.parse_date_into_range( {"date": ""}, "field_name", {"fieldType": "dateRangeString"} ).datum is None + assert ProcessingFunctions.parse_date_into_range( + {"date": "not.date"}, "field_name", {"fieldType": "dateRangeString"} + ).datum is None assert ProcessingFunctions.parse_date_into_range( {"date": "", "releaseDate": "2021-12-15"}, "field_name", From f7f7d08d1f5c1bb87f5d01d415c76ceb0663a34b Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Fri, 29 Nov 2024 12:25:49 +0000 Subject: [PATCH 3/3] Add a test failure description --- .../tests/test_processing_functions.py | 105 ++++++++++++------ 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/preprocessing/nextclade/tests/test_processing_functions.py b/preprocessing/nextclade/tests/test_processing_functions.py index 342d5f8ea..3763a5585 100644 --- a/preprocessing/nextclade/tests/test_processing_functions.py +++ b/preprocessing/nextclade/tests/test_processing_functions.py @@ -347,7 +347,8 @@ def create_test_case(self, factory_custom: ProcessedEntryFactory) -> ProcessingT not_accepted_authors = [ ";", ",;", - " ,;", ",X.;Yu,X.", + " ,;", + ",X.;Yu,X.", ",;Yu,X.", "Anna Maria Smith; Jose X. Perez", "Anna Maria Smith;", @@ -483,42 +484,72 @@ def test_format_authors() -> None: def test_parse_date_into_range() -> None: - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeString"} - ).datum == "2021-12" - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeLower"} - ).datum == "2021-12-01" - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeUpper"} - ).datum == "2021-12-31" - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021-02"}, "field_name", {"fieldType": "dateRangeUpper"} - ).datum == "2021-02-28" - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021"}, "field_name", {"fieldType": "dateRangeUpper"} - ).datum == "2021-12-31" - assert ProcessingFunctions.parse_date_into_range( - {"date": "2021-12", "releaseDate": "2021-12-15"}, - "field_name", - {"fieldType": "dateRangeUpper"}, - ).datum == "2021-12-15" - assert ProcessingFunctions.parse_date_into_range( - {"date": "", "releaseDate": "2021-12-15"}, - "field_name", - {"fieldType": "dateRangeUpper"}, - ).datum == "2021-12-15" - assert ProcessingFunctions.parse_date_into_range( - {"date": ""}, "field_name", {"fieldType": "dateRangeString"} - ).datum is None - assert ProcessingFunctions.parse_date_into_range( - {"date": "not.date"}, "field_name", {"fieldType": "dateRangeString"} - ).datum is None - assert ProcessingFunctions.parse_date_into_range( - {"date": "", "releaseDate": "2021-12-15"}, - "field_name", - {"fieldType": "dateRangeLower"}, - ).datum is None + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeString"} + ).datum + == "2021-12" + ), "dateRangeString: 2021-12 should be returned as is." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeLower"} + ).datum + == "2021-12-01" + ), "dateRangeLower: 2021-12 should be returned as 2021-12-01." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021-12"}, "field_name", {"fieldType": "dateRangeUpper"} + ).datum + == "2021-12-31" + ), "dateRangeUpper: 2021-12 should be returned as 2021-12-31." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021-02"}, "field_name", {"fieldType": "dateRangeUpper"} + ).datum + == "2021-02-28" + ), "dateRangeUpper: 2021-02 should be returned as 2021-02-28." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021"}, "field_name", {"fieldType": "dateRangeUpper"} + ).datum + == "2021-12-31" + ), "dateRangeUpper: 2021 should be returned as 2021-12-31." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "2021-12", "releaseDate": "2021-12-15"}, + "field_name", + {"fieldType": "dateRangeUpper"}, + ).datum + == "2021-12-15" + ), "dateRangeUpper: 2021-12 with releaseDate 2021-12-15 should be returned as 2021-12-15." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "", "releaseDate": "2021-12-15"}, + "field_name", + {"fieldType": "dateRangeUpper"}, + ).datum + == "2021-12-15" + ), "dateRangeUpper: empty date with releaseDate 2021-12-15 should be returned as 2021-12-15." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": ""}, "field_name", {"fieldType": "dateRangeString"} + ).datum + is None + ), "dateRangeString: empty date should be returned as None." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "not.date"}, "field_name", {"fieldType": "dateRangeString"} + ).datum + is None + ), "dateRangeString: invalid date should be returned as None." + assert ( + ProcessingFunctions.parse_date_into_range( + {"date": "", "releaseDate": "2021-12-15"}, + "field_name", + {"fieldType": "dateRangeLower"}, + ).datum + is None + ), "dateRangeLower: empty date should be returned as None." if __name__ == "__main__":