Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update CladeTime tree_as_of and sequence_as_of date handling #34

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ build-backend = "setuptools.build_meta"
[tool.pytest.ini_options]
tmp_path_retention_policy = "none"
filterwarnings = [
"ignore::cladetime.exceptions.CladeTimeFutureDateWarning",
"ignore::DeprecationWarning",
'ignore:polars found a filename',
]
Expand Down
72 changes: 57 additions & 15 deletions src/cladetime/cladetime.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Class for clade time traveling."""

import warnings
from datetime import datetime, timezone

import polars as pl
import structlog

from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.util.config import Config
from cladetime.util.reference import _get_s3_object_url
from cladetime.util.sequence import _get_ncov_metadata, get_covid_genome_metadata
Expand Down Expand Up @@ -51,15 +52,17 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
Use the NextStrain sequences and sequence metadata that were available
as of this date. Can be a datetime object, a string in the format
"YYYY-MM-DD", or None (which defaults to the current date and time).
CladeTime treats all dates and times as UTC.
tree_as_of : datetime | str | None, default = now()
Use the NextStrain reference tree that was available as of this date.
Can be a datetime object, a string in the format
"YYYY-MM-DD", or None (which defaults to the current date and time).
"YYYY-MM-DD", or None (which defaults to the sequence_as_of date).
CladeTime treats all dates and times as UTC.
"""

self._config = self._get_config()
self.sequence_as_of = self._validate_as_of_date(sequence_as_of)
self.tree_as_of = self._validate_as_of_date(tree_as_of)
self.sequence_as_of = sequence_as_of
self.tree_as_of = tree_as_of
self._ncov_metadata = {}
self._sequence_metadata = pl.LazyFrame()

Expand All @@ -78,13 +81,52 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
else:
self.url_ncov_metadata = None

@property
def sequence_as_of(self) -> datetime:
return self._sequence_as_of

@sequence_as_of.setter
def sequence_as_of(self, date) -> None:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for setting sequence_as_of and tree_as_of default values is no longer the same, so created a setter for each of these properties.

"""Set the sequence_as_of attribute."""
sequence_as_of = self._validate_as_of_date(date)
utc_now = datetime.now(timezone.utc)
if sequence_as_of > utc_now:
warnings.warn(
f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}",
category=CladeTimeFutureDateWarning,
)
sequence_as_of = utc_now

self._sequence_as_of = sequence_as_of

@property
def tree_as_of(self) -> datetime:
return self._tree_as_of

@tree_as_of.setter
def tree_as_of(self, date) -> None:
"""Set the tree_as_of attribute."""
if date is None:
tree_as_of = self.sequence_as_of
else:
tree_as_of = self._validate_as_of_date(date)
utc_now = datetime.now(timezone.utc)
if tree_as_of > utc_now:
warnings.warn(
f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}",
category=CladeTimeFutureDateWarning,
)
tree_as_of = self.sequence_as_of

self._tree_as_of = tree_as_of

@property
def ncov_metadata(self):
return self._ncov_metadata

@ncov_metadata.getter
def ncov_metadata(self) -> dict:
"""Set the ncov_metadata attribute."""
"""Get the ncov_metadata attribute."""
if self.url_ncov_metadata:
metadata = _get_ncov_metadata(self.url_ncov_metadata)
return metadata
Expand All @@ -98,13 +140,12 @@ def sequence_metadata(self):

@sequence_metadata.getter
def sequence_metadata(self) -> pl.LazyFrame:
"""Set the sequence_metadata attribute."""
"""Get the sequence_metadata attribute."""
if self.url_sequence_metadata:
sequence_metadata = get_covid_genome_metadata(metadata_url=self.url_sequence_metadata)
return sequence_metadata
else:
raise CladeTimeInvalidURLError("CladeTime is missing url_sequence_metadata")
return sequence_metadata

def __repr__(self):
return f"CladeTime(sequence_as_of={self.sequence_as_of}, tree_as_of={self.tree_as_of})"
Expand All @@ -121,22 +162,23 @@ def _get_config(self) -> Config:
return config

def _validate_as_of_date(self, as_of: str) -> datetime:
"""Validate date the as_of dates used to instantiate CladeTime."""
"""Validate an as_of date used to instantiate CladeTime.

All dates used to instantiate CladeTime are assigned
a datetime tzinfo of UTC.
"""
if as_of is None:
as_of_date = datetime.now()
as_of_date = datetime.now(timezone.utc)
elif isinstance(as_of, datetime):
as_of_date = as_of
as_of_date = as_of.replace(tzinfo=timezone.utc)
elif isinstance(as_of, str):
try:
as_of_date = datetime.strptime(as_of, "%Y-%m-%d")
as_of_date = datetime.strptime(as_of, "%Y-%m-%d").replace(tzinfo=timezone.utc)
except ValueError as e:
raise CladeTimeInvalidDateError(f"Invalid date string: {as_of} (should be in YYYY-MM-DD format)") from e

as_of_date = as_of_date.replace(microsecond=0, tzinfo=timezone.utc)
as_of_date = as_of_date.replace(microsecond=0)
if as_of_date < self._config.nextstrain_min_seq_date:
raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}")

if as_of_date > datetime.now().replace(tzinfo=timezone.utc):
raise CladeTimeInvalidDateError(f"Date cannot be in the future: {as_of_date}")

return as_of_date
4 changes: 4 additions & 0 deletions src/cladetime/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ class CladeTimeInvalidDateError(Error):

class CladeTimeInvalidURLError(Error):
"""Raised when CladeTime encounters an invalid URL."""


class CladeTimeFutureDateWarning(Warning):
"""Raised when CladeTime as_of date is in the future."""
33 changes: 30 additions & 3 deletions tests/unit/test_cladetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dateutil.tz
import pytest
from cladetime.cladetime import CladeTime
from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from freezegun import freeze_time


Expand Down Expand Up @@ -36,14 +36,32 @@ def test_cladetime_no_args():
datetime(2024, 9, 30, 18, 24, 59, 655398),
None,
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
),
(
datetime(2024, 2, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, 22, 22, 22, tzinfo=timezone.utc),
datetime(2024, 2, 22, tzinfo=timezone.utc),
),
(
"2023-12-21",
None,
datetime(2023, 12, 21, tzinfo=timezone.utc),
datetime(2023, 12, 21, tzinfo=timezone.utc),
),
(
"2063-12-21",
None,
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
(
"2063-12-21",
"2074-07-13",
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
],
)
def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of):
Expand All @@ -54,12 +72,21 @@ def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_
assert ct.tree_as_of == expected_tree_as_of


@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32", "2063-04-05"])
@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32"])
def test_cladetime_invalid_date(bad_date):
with pytest.raises(CladeTimeInvalidDateError):
CladeTime(sequence_as_of=bad_date, tree_as_of=bad_date)


def test_cladetime_future_date():
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(tree_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2023-12-31", tree_as_of="2063-07-13")


@pytest.mark.parametrize(
"sequence_as_of, expected_content",
[
Expand Down