Skip to content

Commit

Permalink
Add a tree property to Tree
Browse files Browse the repository at this point in the history
Also move the tree tests to the integration suite, since we're doing
network connections.
  • Loading branch information
bsweger committed Oct 22, 2024
1 parent 1a2d7a7 commit dd824e5
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/cladetime/util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class Config:
nextclade_data_url = "https://data.clades.nextstrain.org"
nextclade_data_url_version = "v3"
nextclade_base_url: str = "https://nextstrain.org/nextclade/sars-cov-2"
nextclade_input_tree_name: str = "tree.json"
reference_tree_file: AnyPath = None
root_sequence_file: AnyPath = None
assignment_no_metadata_file: AnyPath = None
Expand Down
59 changes: 47 additions & 12 deletions src/cladetime/util/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from urllib.parse import urljoin

import structlog
from requests import Session

from cladetime import CladeTime
from cladetime.exceptions import TreeNotAvailableError
from cladetime.util.reference import _get_s3_object_url
from cladetime.util.sequence import _get_ncov_metadata
from cladetime.util.session import _check_response

logger = structlog.get_logger()

Expand All @@ -33,8 +35,16 @@ def __init__(self, clade_time: CladeTime):
self.as_of = self._clade_time.tree_as_of
self._nextclade_data_url = self._clade_time._config.nextclade_data_url
self._nextclade_data_url_version = self._clade_time._config.nextclade_data_url_version
self._tree_name = self._clade_time._config.nextclade_input_tree_name
self._url = self.url

def __repr__(self):
cls = self.__class__.__name__
return f"{cls}(as_of={self.as_of.strftime('%Y-%m-%d')}, tree_updated={self.tree['meta'].get('updated')})"

def __str__(self):
return f"Represents Nexclade reference tree data as of {self.as_of.strftime('%Y-%m-%d')}"

@property
def url(self) -> str:
"""
Expand All @@ -46,11 +56,13 @@ def url(self) -> str:
except TreeNotAvailableError as err:
raise err

def __repr__(self):
return f"Tree(as_of={self.as_of})"

def __str__(self):
return f"Represents Nexclade reference tree data as of {self.as_of}"
@property
def tree(self) -> dict:
"""
dict : A SARS-CoV-2 reference tree in `Nextstrain Auspice JSON format
<https://docs.nextstrain.org/projects/auspice/en/stable/releases/v2.html#new-dataset-json-format>`_.
"""
return self._get_reference_tree()

def _get_tree_url(self):
"""Get the URL to a Nextclade SARS-CoV-2 reference tree.
Expand All @@ -73,7 +85,7 @@ def _get_tree_url(self):
If there is no ncov metadata available for the specified date.
"""

# We can only reliably retrieve the a past reference tree if we
# we can only reliably retrieve the a past reference tree if we
# have access to the ncov metadata for that date
min_tree_as_of = self._clade_time._config.nextstrain_min_ncov_metadata_date
if min_tree_as_of > self.as_of:
Expand All @@ -93,10 +105,10 @@ def _get_tree_url(self):
nextclade_dataset_name = ncov_metadata.get("nextclade_dataset_name_full")
nextclade_dataset_version = ncov_metadata.get("nextclade_dataset_version")

# nextclade_data_url = "https://data.clades.nextstrain.org/v3"
# nextclade_data_url = "https://data.clades.nextstrain.org/v3/"
tree_url = urljoin(
self._nextclade_data_url,
f"{self._nextclade_data_url_version}/{nextclade_dataset_name}/{nextclade_dataset_version}/tree.json",
f"{self._nextclade_data_url_version}/{nextclade_dataset_name}/{nextclade_dataset_version}/{self._tree_name}",
)
return tree_url

Expand All @@ -108,16 +120,39 @@ def _get_url_ncov_metadata(self):
self.as_of,
)[1]

def get_reference_tree(self) -> dict:
def _get_reference_tree(self, session: Session | None = None) -> dict:
"""Return a reference tree used for SARS-CoV-2 clade assignments
Retrieves the reference tree that was current as of
:any:`tree_as_of<tree_as_of>`.
:any:`tree_as_of<tree_as_of>`. The reference tree is expressed in
`Nextstrain Auspice JSON format
<https://docs.nextstrain.org/projects/auspice/en/stable/releases/v2.html#new-dataset-json-format>`_.
Parameters
----------
session : requests.Session, optional
A requests session object to use when downloading the
reference tree. When not provided, a new session will
be created with headers that `specify Nextstrain's media types
<https://docs.nextstrain.org/projects/auspice/en/stable/usage/api.html#media-types>`_.
media types.
This method is not yet implemented.
Returns
-------
dict
A Python dictionary that represents the reference tree.
"""
return {self.as_of: "not implemented"}

if not session:
session = Session()
headers = {
"Accept": "application/vnd.nextstrain.dataset.main+json",
"Content-Type": "application/vnd.nextstrain.dataset.main+json",
}

resp = session.get(self.url, headers=headers)
_check_response(resp)

tree = resp.json()
return tree
26 changes: 26 additions & 0 deletions tests/integration/test_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from urllib.parse import urlparse

import pytest
from cladetime import CladeTime, Tree
from cladetime.exceptions import TreeNotAvailableError
from freezegun import freeze_time


def test__get_tree_url():
with freeze_time("2024-08-13 16:21:34"):
tree = Tree(CladeTime())
tree_url_parts = urlparse(tree.url)
assert "2024-07-17--12-57-03Z" in tree_url_parts.path
assert "tree.json" in tree_url_parts.path


def test__get_tree_url_bad_date():
# we cannot get reference trees prior to 2024-08-01
with pytest.raises(TreeNotAvailableError):
Tree(CladeTime(tree_as_of="2024-07-13"))


def test__get_reference_tree():
with freeze_time("2024-08-13 16:21:34"):
tree = Tree(CladeTime())
assert tree.tree.get("meta", "").get("title", "").lower() == "sars-cov-2 phylogeny"

0 comments on commit dd824e5

Please sign in to comment.