diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index 09efd3a..d38e6b1 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -32,15 +32,17 @@ class CladeTime: sequence metadata files that will be used by CladeTime properties and methods. Can be a datetime object or a string in YYYY-MM-DD format, both of which will be treated as - UTC. The default value is the current time. + UTC. The default value is the current UTC time. Dates passed + as YYYY-MM-DD strings will be set to 11:59:59 PM UTC. tree_as_of : datetime.datetime | str | None Sets the version of the Nextstrain reference tree that will be used by CladeTime. Can be a datetime object or a string in YYYY-MM-DD format, both of which will be treated as UTC. The default value is :any:`sequence_as_of`, unless sequence_as_of is before reference tree availability - (2024-08-01), in which case tree_as_of will default to the - current time. + (2024-08-01), in which case tree_as_of will default to current + time UTC. Dates passed as YYYY-MM-DD strings will be + set to 11:59:59 PM UTC. Attributes ---------- diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index e425b1a..534de4f 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -274,10 +274,10 @@ def filter_metadata( # Apply filters for min and max sequence collection date, if applicable if collection_min_date is not None: - collection_min_date = _get_date(collection_min_date) + collection_min_date = _get_date(collection_min_date).replace(hour=0, minute=0, second=0) filtered_metadata = filtered_metadata.filter(pl.col("date") >= collection_min_date) if collection_max_date is not None: - collection_max_date = _get_date(collection_max_date) + collection_max_date = _get_date(collection_max_date).replace(hour=0, minute=0, second=0) filtered_metadata = filtered_metadata.filter(pl.col("date") <= collection_max_date) # Create state mappings based on state_format parameter, including a DC alias, since diff --git a/src/cladetime/util/reference.py b/src/cladetime/util/reference.py index e1b6ea8..5369cbf 100644 --- a/src/cladetime/util/reference.py +++ b/src/cladetime/util/reference.py @@ -28,7 +28,11 @@ def _get_date(original_date: datetime | str | None) -> datetime: new_date = original_date.replace(tzinfo=timezone.utc) elif isinstance(original_date, str): try: - new_date = datetime.strptime(original_date, "%Y-%m-%d").replace(tzinfo=timezone.utc) + new_date = ( + datetime.strptime(original_date, "%Y-%m-%d") + .replace(hour=11, minute=59, second=59) + .replace(tzinfo=timezone.utc) + ) except ValueError as e: raise ValueError(f"Invalid date format: {original_date}") from e diff --git a/tests/integration/test_cladetime_integration.py b/tests/integration/test_cladetime_integration.py index 4551a97..4262207 100644 --- a/tests/integration/test_cladetime_integration.py +++ b/tests/integration/test_cladetime_integration.py @@ -109,9 +109,9 @@ def test_assign_old_tree(test_file_path, tmp_path, test_sequences): expected_summary, old_assigned_clades.summary.collect(), check_column_order=False, check_row_order=False ) - # metadata should reflect ncov metadata as of 2024-11-01 assert old_assigned_clades.meta.get("sequence_as_of") == datetime(2024, 11, 1, tzinfo=timezone.utc) - assert old_assigned_clades.meta.get("tree_as_of") == datetime(2024, 8, 2, tzinfo=timezone.utc) + assert old_assigned_clades.meta.get("tree_as_of") == datetime(2024, 8, 2, 11, 59, 59, tzinfo=timezone.utc) + # nextclade metadata should reflect its state on tree_as_of (2024-08-02) assert old_assigned_clades.meta.get("nextclade_dataset_version") == "2024-07-17--12-57-03Z" assert old_assigned_clades.meta.get("nextclade_version_num") == "3.8.2" assert old_assigned_clades.meta.get("assignment_as_of") == "2024-11-01 00:00" diff --git a/tests/unit/test_cladetime.py b/tests/unit/test_cladetime.py index 08289c1..aba6e81 100644 --- a/tests/unit/test_cladetime.py +++ b/tests/unit/test_cladetime.py @@ -26,8 +26,8 @@ def test_cladetime_no_args(): # (metadata for reference trees started publishing in Aug, 2024) "2024-09-01", "2024-01-01", - datetime(2024, 9, 1, tzinfo=timezone.utc), - datetime(2024, 9, 1, tzinfo=timezone.utc), + datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc), + datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc), ), ( # sequence_as_of set to current date, tree_as_of defaults to sequence_as_of @@ -41,7 +41,7 @@ def test_cladetime_no_args(): None, "2024-09-01", datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), - datetime(2024, 9, 1, tzinfo=timezone.utc), + datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc), ), ( # tree_as_of set to sequence_as_of @@ -62,7 +62,7 @@ def test_cladetime_no_args(): # defaults to current date "2023-12-21", None, - datetime(2023, 12, 21, tzinfo=timezone.utc), + datetime(2023, 12, 21, 11, 59, 59, tzinfo=timezone.utc), datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), ), ( @@ -85,7 +85,7 @@ def test_cladetime_no_args(): # 2024-08-01, so it should revert to current date "2023-07-13", "2074-07", - datetime(2023, 7, 13, tzinfo=timezone.utc), + datetime(2023, 7, 13, 11, 59, 59, tzinfo=timezone.utc), datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), ), ],