From 731213d721ab771d12342b5b63a6e219d5103fd1 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 9 Oct 2024 16:47:17 -0400 Subject: [PATCH 1/3] Update CladeTime tree_as_of and sequence_as_of date handling Resolve #33 --- pyproject.toml | 1 + src/cladetime/cladetime.py | 60 +++++++++++++++++++++++++++++------- src/cladetime/exceptions.py | 4 +++ tests/unit/test_cladetime.py | 33 ++++++++++++++++++-- 4 files changed, 84 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a38d78c..260fd1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] tmp_path_retention_policy = "none" filterwarnings = [ + "ignore::cladetime.exceptions.CladeTimeFutureDateWarning", "ignore::DeprecationWarning", 'ignore:polars found a filename', ] diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index 22d24f5..c09e3f5 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -1,11 +1,12 @@ """Class for clade time traveling.""" +import warnings from datetime import datetime, timezone import polars as pl import structlog -from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError +from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError from cladetime.util.config import Config from cladetime.util.reference import _get_s3_object_url from cladetime.util.sequence import _get_ncov_metadata, get_covid_genome_metadata @@ -54,12 +55,12 @@ def __init__(self, sequence_as_of=None, tree_as_of=None): tree_as_of : datetime | str | None, default = now() Use the NextStrain reference tree that was available as of this date. Can be a datetime object, a string in the format - "YYYY-MM-DD", or None (which defaults to the current date and time). + "YYYY-MM-DD", or None (which defaults to the sequence_as_of date). """ self._config = self._get_config() - self.sequence_as_of = self._validate_as_of_date(sequence_as_of) - self.tree_as_of = self._validate_as_of_date(tree_as_of) + self.sequence_as_of = sequence_as_of + self.tree_as_of = tree_as_of self._ncov_metadata = {} self._sequence_metadata = pl.LazyFrame() @@ -78,13 +79,54 @@ def __init__(self, sequence_as_of=None, tree_as_of=None): else: self.url_ncov_metadata = None + @property + def sequence_as_of(self) -> datetime: + return self._sequence_as_of + + @sequence_as_of.setter + def sequence_as_of(self, date) -> None: + """Set the sequence_as_of attribute.""" + if date is None: + sequence_as_of = datetime.now() + sequence_as_of = self._validate_as_of_date(date) + utc_now = datetime.now().replace(tzinfo=timezone.utc) + if sequence_as_of > utc_now: + warnings.warn( + f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}", + category=CladeTimeFutureDateWarning, + ) + sequence_as_of = utc_now + + self._sequence_as_of = sequence_as_of + + @property + def tree_as_of(self) -> datetime: + return self._tree_as_of + + @tree_as_of.setter + def tree_as_of(self, date) -> None: + """Set the tree_as_of attribute.""" + if date is None: + tree_as_of = self.sequence_as_of + else: + tree_as_of = self._validate_as_of_date(date) + utc_now = datetime.now().replace(tzinfo=timezone.utc) + if tree_as_of > utc_now: + warnings.warn( + f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}", + category=CladeTimeFutureDateWarning, + ) + tree_as_of = self.sequence_as_of + + self._tree_as_of = tree_as_of + @property def ncov_metadata(self): return self._ncov_metadata @ncov_metadata.getter def ncov_metadata(self) -> dict: - """Set the ncov_metadata attribute.""" + """Get the ncov_metadata attribute.""" if self.url_ncov_metadata: metadata = _get_ncov_metadata(self.url_ncov_metadata) return metadata @@ -98,13 +140,12 @@ def sequence_metadata(self): @sequence_metadata.getter def sequence_metadata(self) -> pl.LazyFrame: - """Set the sequence_metadata attribute.""" + """Get the sequence_metadata attribute.""" if self.url_sequence_metadata: sequence_metadata = get_covid_genome_metadata(metadata_url=self.url_sequence_metadata) return sequence_metadata else: raise CladeTimeInvalidURLError("CladeTime is missing url_sequence_metadata") - return sequence_metadata def __repr__(self): return f"CladeTime(sequence_as_of={self.sequence_as_of}, tree_as_of={self.tree_as_of})" @@ -121,7 +162,7 @@ def _get_config(self) -> Config: return config def _validate_as_of_date(self, as_of: str) -> datetime: - """Validate date the as_of dates used to instantiate CladeTime.""" + """Validate an as_of date (UTC) used to instantiate CladeTime.""" if as_of is None: as_of_date = datetime.now() elif isinstance(as_of, datetime): @@ -136,7 +177,4 @@ def _validate_as_of_date(self, as_of: str) -> datetime: if as_of_date < self._config.nextstrain_min_seq_date: raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}") - if as_of_date > datetime.now().replace(tzinfo=timezone.utc): - raise CladeTimeInvalidDateError(f"Date cannot be in the future: {as_of_date}") - return as_of_date diff --git a/src/cladetime/exceptions.py b/src/cladetime/exceptions.py index f53b5f4..21d91ec 100644 --- a/src/cladetime/exceptions.py +++ b/src/cladetime/exceptions.py @@ -11,3 +11,7 @@ class CladeTimeInvalidDateError(Error): class CladeTimeInvalidURLError(Error): """Raised when CladeTime encounters an invalid URL.""" + + +class CladeTimeFutureDateWarning(Warning): + """Raised when CladeTime as_of date is in the future.""" diff --git a/tests/unit/test_cladetime.py b/tests/unit/test_cladetime.py index 0e55533..70202db 100644 --- a/tests/unit/test_cladetime.py +++ b/tests/unit/test_cladetime.py @@ -5,7 +5,7 @@ import dateutil.tz import pytest from cladetime.cladetime import CladeTime -from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError +from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError from freezegun import freeze_time @@ -36,7 +36,7 @@ def test_cladetime_no_args(): datetime(2024, 9, 30, 18, 24, 59, 655398), None, datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc), - datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc), ), ( datetime(2024, 2, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")), @@ -44,6 +44,24 @@ def test_cladetime_no_args(): datetime(2024, 2, 22, 22, 22, 22, tzinfo=timezone.utc), datetime(2024, 2, 22, tzinfo=timezone.utc), ), + ( + "2023-12-21", + None, + datetime(2023, 12, 21, tzinfo=timezone.utc), + datetime(2023, 12, 21, tzinfo=timezone.utc), + ), + ( + "2063-12-21", + None, + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + ), + ( + "2063-12-21", + "2074-07-13", + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + ), ], ) def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of): @@ -54,12 +72,21 @@ def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_ assert ct.tree_as_of == expected_tree_as_of -@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32", "2063-04-05"]) +@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32"]) def test_cladetime_invalid_date(bad_date): with pytest.raises(CladeTimeInvalidDateError): CladeTime(sequence_as_of=bad_date, tree_as_of=bad_date) +def test_cladetime_future_date(): + with pytest.warns(CladeTimeFutureDateWarning): + CladeTime(sequence_as_of="2063-07-13") + with pytest.warns(CladeTimeFutureDateWarning): + CladeTime(tree_as_of="2063-07-13") + with pytest.warns(CladeTimeFutureDateWarning): + CladeTime(sequence_as_of="2023-12-31", tree_as_of="2063-07-13") + + @pytest.mark.parametrize( "sequence_as_of, expected_content", [ From 24df5d2fbcfdb5a36dafaaab535fd10e056ff83d Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 10 Oct 2024 15:11:19 -0400 Subject: [PATCH 2/3] Update src/cladetime/cladetime.py Co-authored-by: Evan Ray --- src/cladetime/cladetime.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index c09e3f5..df11a96 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -86,8 +86,6 @@ def sequence_as_of(self) -> datetime: @sequence_as_of.setter def sequence_as_of(self, date) -> None: """Set the sequence_as_of attribute.""" - if date is None: - sequence_as_of = datetime.now() sequence_as_of = self._validate_as_of_date(date) utc_now = datetime.now().replace(tzinfo=timezone.utc) if sequence_as_of > utc_now: From 424dda2d8cbe8b490555515ca762c4da04cc1438 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 10 Oct 2024 15:51:59 -0400 Subject: [PATCH 3/3] UTC fixes Fix logic that gets the current datetime for the UTC timezone. Also ensure that CladeTime treats all incoming dates as UTC. --- src/cladetime/cladetime.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index df11a96..d3b108d 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -52,10 +52,12 @@ def __init__(self, sequence_as_of=None, tree_as_of=None): Use the NextStrain sequences and sequence metadata that were available as of this date. Can be a datetime object, a string in the format "YYYY-MM-DD", or None (which defaults to the current date and time). + CladeTime treats all dates and times as UTC. tree_as_of : datetime | str | None, default = now() Use the NextStrain reference tree that was available as of this date. Can be a datetime object, a string in the format "YYYY-MM-DD", or None (which defaults to the sequence_as_of date). + CladeTime treats all dates and times as UTC. """ self._config = self._get_config() @@ -87,7 +89,7 @@ def sequence_as_of(self) -> datetime: def sequence_as_of(self, date) -> None: """Set the sequence_as_of attribute.""" sequence_as_of = self._validate_as_of_date(date) - utc_now = datetime.now().replace(tzinfo=timezone.utc) + utc_now = datetime.now(timezone.utc) if sequence_as_of > utc_now: warnings.warn( f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}", @@ -108,7 +110,7 @@ def tree_as_of(self, date) -> None: tree_as_of = self.sequence_as_of else: tree_as_of = self._validate_as_of_date(date) - utc_now = datetime.now().replace(tzinfo=timezone.utc) + utc_now = datetime.now(timezone.utc) if tree_as_of > utc_now: warnings.warn( f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}", @@ -160,18 +162,22 @@ def _get_config(self) -> Config: return config def _validate_as_of_date(self, as_of: str) -> datetime: - """Validate an as_of date (UTC) used to instantiate CladeTime.""" + """Validate an as_of date used to instantiate CladeTime. + + All dates used to instantiate CladeTime are assigned + a datetime tzinfo of UTC. + """ if as_of is None: - as_of_date = datetime.now() + as_of_date = datetime.now(timezone.utc) elif isinstance(as_of, datetime): - as_of_date = as_of + as_of_date = as_of.replace(tzinfo=timezone.utc) elif isinstance(as_of, str): try: - as_of_date = datetime.strptime(as_of, "%Y-%m-%d") + as_of_date = datetime.strptime(as_of, "%Y-%m-%d").replace(tzinfo=timezone.utc) except ValueError as e: raise CladeTimeInvalidDateError(f"Invalid date string: {as_of} (should be in YYYY-MM-DD format)") from e - as_of_date = as_of_date.replace(microsecond=0, tzinfo=timezone.utc) + as_of_date = as_of_date.replace(microsecond=0) if as_of_date < self._config.nextstrain_min_seq_date: raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}")