Skip to content

Commit

Permalink
Update CladeTime tree_as_of and sequence_as_of date handling
Browse files Browse the repository at this point in the history
Resolve #33
  • Loading branch information
bsweger committed Oct 9, 2024
1 parent 64e6ad0 commit 731213d
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 14 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ build-backend = "setuptools.build_meta"
[tool.pytest.ini_options]
tmp_path_retention_policy = "none"
filterwarnings = [
"ignore::cladetime.exceptions.CladeTimeFutureDateWarning",
"ignore::DeprecationWarning",
'ignore:polars found a filename',
]
Expand Down
60 changes: 49 additions & 11 deletions src/cladetime/cladetime.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Class for clade time traveling."""

import warnings
from datetime import datetime, timezone

import polars as pl
import structlog

from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.util.config import Config
from cladetime.util.reference import _get_s3_object_url
from cladetime.util.sequence import _get_ncov_metadata, get_covid_genome_metadata
Expand Down Expand Up @@ -54,12 +55,12 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
tree_as_of : datetime | str | None, default = now()
Use the NextStrain reference tree that was available as of this date.
Can be a datetime object, a string in the format
"YYYY-MM-DD", or None (which defaults to the current date and time).
"YYYY-MM-DD", or None (which defaults to the sequence_as_of date).
"""

self._config = self._get_config()
self.sequence_as_of = self._validate_as_of_date(sequence_as_of)
self.tree_as_of = self._validate_as_of_date(tree_as_of)
self.sequence_as_of = sequence_as_of
self.tree_as_of = tree_as_of
self._ncov_metadata = {}
self._sequence_metadata = pl.LazyFrame()

Expand All @@ -78,13 +79,54 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
else:
self.url_ncov_metadata = None

@property
def sequence_as_of(self) -> datetime:
return self._sequence_as_of

@sequence_as_of.setter
def sequence_as_of(self, date) -> None:
"""Set the sequence_as_of attribute."""
if date is None:
sequence_as_of = datetime.now()
sequence_as_of = self._validate_as_of_date(date)
utc_now = datetime.now().replace(tzinfo=timezone.utc)
if sequence_as_of > utc_now:
warnings.warn(
f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}",
category=CladeTimeFutureDateWarning,
)
sequence_as_of = utc_now

self._sequence_as_of = sequence_as_of

@property
def tree_as_of(self) -> datetime:
return self._tree_as_of

@tree_as_of.setter
def tree_as_of(self, date) -> None:
"""Set the tree_as_of attribute."""
if date is None:
tree_as_of = self.sequence_as_of
else:
tree_as_of = self._validate_as_of_date(date)
utc_now = datetime.now().replace(tzinfo=timezone.utc)
if tree_as_of > utc_now:
warnings.warn(
f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}",
category=CladeTimeFutureDateWarning,
)
tree_as_of = self.sequence_as_of

self._tree_as_of = tree_as_of

@property
def ncov_metadata(self):
return self._ncov_metadata

@ncov_metadata.getter
def ncov_metadata(self) -> dict:
"""Set the ncov_metadata attribute."""
"""Get the ncov_metadata attribute."""
if self.url_ncov_metadata:
metadata = _get_ncov_metadata(self.url_ncov_metadata)
return metadata
Expand All @@ -98,13 +140,12 @@ def sequence_metadata(self):

@sequence_metadata.getter
def sequence_metadata(self) -> pl.LazyFrame:
"""Set the sequence_metadata attribute."""
"""Get the sequence_metadata attribute."""
if self.url_sequence_metadata:
sequence_metadata = get_covid_genome_metadata(metadata_url=self.url_sequence_metadata)
return sequence_metadata
else:
raise CladeTimeInvalidURLError("CladeTime is missing url_sequence_metadata")
return sequence_metadata

def __repr__(self):
return f"CladeTime(sequence_as_of={self.sequence_as_of}, tree_as_of={self.tree_as_of})"
Expand All @@ -121,7 +162,7 @@ def _get_config(self) -> Config:
return config

def _validate_as_of_date(self, as_of: str) -> datetime:
"""Validate date the as_of dates used to instantiate CladeTime."""
"""Validate an as_of date (UTC) used to instantiate CladeTime."""
if as_of is None:
as_of_date = datetime.now()
elif isinstance(as_of, datetime):
Expand All @@ -136,7 +177,4 @@ def _validate_as_of_date(self, as_of: str) -> datetime:
if as_of_date < self._config.nextstrain_min_seq_date:
raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}")

if as_of_date > datetime.now().replace(tzinfo=timezone.utc):
raise CladeTimeInvalidDateError(f"Date cannot be in the future: {as_of_date}")

return as_of_date
4 changes: 4 additions & 0 deletions src/cladetime/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ class CladeTimeInvalidDateError(Error):

class CladeTimeInvalidURLError(Error):
"""Raised when CladeTime encounters an invalid URL."""


class CladeTimeFutureDateWarning(Warning):
"""Raised when CladeTime as_of date is in the future."""
33 changes: 30 additions & 3 deletions tests/unit/test_cladetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dateutil.tz
import pytest
from cladetime.cladetime import CladeTime
from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from freezegun import freeze_time


Expand Down Expand Up @@ -36,14 +36,32 @@ def test_cladetime_no_args():
datetime(2024, 9, 30, 18, 24, 59, 655398),
None,
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
),
(
datetime(2024, 2, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, 22, 22, 22, tzinfo=timezone.utc),
datetime(2024, 2, 22, tzinfo=timezone.utc),
),
(
"2023-12-21",
None,
datetime(2023, 12, 21, tzinfo=timezone.utc),
datetime(2023, 12, 21, tzinfo=timezone.utc),
),
(
"2063-12-21",
None,
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
(
"2063-12-21",
"2074-07-13",
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
],
)
def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of):
Expand All @@ -54,12 +72,21 @@ def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_
assert ct.tree_as_of == expected_tree_as_of


@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32", "2063-04-05"])
@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32"])
def test_cladetime_invalid_date(bad_date):
with pytest.raises(CladeTimeInvalidDateError):
CladeTime(sequence_as_of=bad_date, tree_as_of=bad_date)


def test_cladetime_future_date():
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(tree_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2023-12-31", tree_as_of="2063-07-13")


@pytest.mark.parametrize(
"sequence_as_of, expected_content",
[
Expand Down

0 comments on commit 731213d

Please sign in to comment.