Skip to content

Commit

Permalink
Default string-based "as_of" dates to a time of 11:59:59 PM UTC
Browse files Browse the repository at this point in the history
Resolves #24

When someone instantiates a CladeTime object using string-based
date formats (YYYY-MM-DD) for sequence_as_of or tree_as_of,
set the corresponding timestamp to 11:59:59 to ensure that the
entire day is included when searching for S3 object versions
that match the date.
  • Loading branch information
bsweger committed Jan 3, 2025
1 parent b2c18ca commit ff026f0
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 13 deletions.
8 changes: 5 additions & 3 deletions src/cladetime/cladetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@ class CladeTime:
sequence metadata files that will be used by CladeTime
properties and methods. Can be a datetime object or a
string in YYYY-MM-DD format, both of which will be treated as
UTC. The default value is the current time.
UTC. The default value is the current UTC time. Dates passed
as YYYY-MM-DD strings will be set to 11:59:59 PM UTC.
tree_as_of : datetime.datetime | str | None
Sets the version of the Nextstrain reference tree that will be
used by CladeTime. Can be a datetime object or a string in
YYYY-MM-DD format, both of which will be treated as UTC.
The default value is :any:`sequence_as_of<sequence_as_of>`,
unless sequence_as_of is before reference tree availability
(2024-08-01), in which case tree_as_of will default to the
current time.
(2024-08-01), in which case tree_as_of will default to current
time UTC. Dates passed as YYYY-MM-DD strings will be
set to 11:59:59 PM UTC.
Attributes
----------
Expand Down
4 changes: 2 additions & 2 deletions src/cladetime/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,10 @@ def filter_metadata(

# Apply filters for min and max sequence collection date, if applicable
if collection_min_date is not None:
collection_min_date = _get_date(collection_min_date)
collection_min_date = _get_date(collection_min_date).replace(hour=0, minute=0, second=0)
filtered_metadata = filtered_metadata.filter(pl.col("date") >= collection_min_date)
if collection_max_date is not None:
collection_max_date = _get_date(collection_max_date)
collection_max_date = _get_date(collection_max_date).replace(hour=0, minute=0, second=0)
filtered_metadata = filtered_metadata.filter(pl.col("date") <= collection_max_date)

# Create state mappings based on state_format parameter, including a DC alias, since
Expand Down
6 changes: 5 additions & 1 deletion src/cladetime/util/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ def _get_date(original_date: datetime | str | None) -> datetime:
new_date = original_date.replace(tzinfo=timezone.utc)
elif isinstance(original_date, str):
try:
new_date = datetime.strptime(original_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
new_date = (
datetime.strptime(original_date, "%Y-%m-%d")
.replace(hour=11, minute=59, second=59)
.replace(tzinfo=timezone.utc)
)
except ValueError as e:
raise ValueError(f"Invalid date format: {original_date}") from e

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_cladetime_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def test_assign_old_tree(test_file_path, tmp_path, test_sequences):
expected_summary, old_assigned_clades.summary.collect(), check_column_order=False, check_row_order=False
)

# metadata should reflect ncov metadata as of 2024-11-01
assert old_assigned_clades.meta.get("sequence_as_of") == datetime(2024, 11, 1, tzinfo=timezone.utc)
assert old_assigned_clades.meta.get("tree_as_of") == datetime(2024, 8, 2, tzinfo=timezone.utc)
assert old_assigned_clades.meta.get("tree_as_of") == datetime(2024, 8, 2, 11, 59, 59, tzinfo=timezone.utc)
# nextclade metadata should reflect its state on tree_as_of (2024-08-02)
assert old_assigned_clades.meta.get("nextclade_dataset_version") == "2024-07-17--12-57-03Z"
assert old_assigned_clades.meta.get("nextclade_version_num") == "3.8.2"
assert old_assigned_clades.meta.get("assignment_as_of") == "2024-11-01 00:00"
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/test_cladetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def test_cladetime_no_args():
# (metadata for reference trees started publishing in Aug, 2024)
"2024-09-01",
"2024-01-01",
datetime(2024, 9, 1, tzinfo=timezone.utc),
datetime(2024, 9, 1, tzinfo=timezone.utc),
datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc),
datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc),
),
(
# sequence_as_of set to current date, tree_as_of defaults to sequence_as_of
Expand All @@ -41,7 +41,7 @@ def test_cladetime_no_args():
None,
"2024-09-01",
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2024, 9, 1, tzinfo=timezone.utc),
datetime(2024, 9, 1, 11, 59, 59, tzinfo=timezone.utc),
),
(
# tree_as_of set to sequence_as_of
Expand All @@ -62,7 +62,7 @@ def test_cladetime_no_args():
# defaults to current date
"2023-12-21",
None,
datetime(2023, 12, 21, tzinfo=timezone.utc),
datetime(2023, 12, 21, 11, 59, 59, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
(
Expand All @@ -85,7 +85,7 @@ def test_cladetime_no_args():
# 2024-08-01, so it should revert to current date
"2023-07-13",
"2074-07",
datetime(2023, 7, 13, tzinfo=timezone.utc),
datetime(2023, 7, 13, 11, 59, 59, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
],
Expand Down

0 comments on commit ff026f0

Please sign in to comment.