Skip to content

Commit

Permalink
Add tests for new as_of functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
bsweger committed Sep 27, 2024
1 parent 4b0abcd commit a1fe96f
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ dependencies = [
[project.optional-dependencies]
dev = [
"coverage",
"freezegun",
"moto",
"mypy",
"pytest",
"ruff",
Expand Down
39 changes: 36 additions & 3 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
awscli==1.32.116
# via virus-clade-utils (pyproject.toml)
boto3==1.34.116
# via virus-clade-utils (pyproject.toml)
# via
# virus-clade-utils (pyproject.toml)
# moto
botocore==1.34.116
# via
# awscli
# boto3
# moto
# s3transfer
certifi==2024.2.2
# via requests
cffi==1.17.1
# via cryptography
charset-normalizer==3.3.2
# via requests
click==8.1.7
Expand All @@ -23,22 +28,34 @@ colorama==0.4.6
# via awscli
coverage==7.5.3
# via virus-clade-utils (pyproject.toml)
cryptography==43.0.1
# via moto
docutils==0.16
# via awscli
freezegun==1.5.1
# via virus-clade-utils (pyproject.toml)
idna==3.7
# via requests
iniconfig==2.0.0
# via pytest
jellyfish==1.1.0
# via us
jinja2==3.1.4
# via moto
jmespath==1.0.1
# via
# boto3
# botocore
markdown-it-py==3.0.0
# via rich
markupsafe==2.1.5
# via
# jinja2
# werkzeug
mdurl==0.1.2
# via markdown-it-py
moto==5.0.15
# via virus-clade-utils (pyproject.toml)
mypy==1.10.1
# via virus-clade-utils (pyproject.toml)
mypy-extensions==1.0.0
Expand All @@ -59,20 +76,31 @@ pyarrow==16.1.0
# via virus-clade-utils (pyproject.toml)
pyasn1==0.6.0
# via rsa
pycparser==2.22
# via cffi
pygments==2.18.0
# via rich
pytest==8.2.1
# via virus-clade-utils (pyproject.toml)
python-dateutil==2.9.0.post0
# via
# botocore
# freezegun
# moto
# pandas
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via awscli
# via
# awscli
# responses
requests==2.32.3
# via virus-clade-utils (pyproject.toml)
# via
# virus-clade-utils (pyproject.toml)
# moto
# responses
responses==0.25.3
# via moto
rich==13.7.1
# via
# virus-clade-utils (pyproject.toml)
Expand Down Expand Up @@ -101,5 +129,10 @@ urllib3==2.2.1
# via
# botocore
# requests
# responses
us==3.2.0
# via virus-clade-utils (pyproject.toml)
werkzeug==3.0.4
# via moto
xmltodict==0.13.0
# via moto
43 changes: 43 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import boto3
import pytest
import requests
from freezegun import freeze_time
from moto import mock_aws


@pytest.fixture
def mock_session(mocker):
"""Session mock for testing functions that use requests.Session"""
mock_session = mocker.patch.object(requests, "Session", autospec=True)
mock_session.return_value.__enter__.return_value = mock_session
return mock_session


@pytest.fixture
def s3_setup():
"""Setup mock S3 bucket with versioned objects."""
with mock_aws():
bucket_name = "versioned-bucket"
object_key = "metadata/object-key/metadata.tsv.zst"

s3_client = boto3.client("s3", region_name="us-east-1")
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_bucket_versioning(Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"})

# Upload multiple versions of the object
versions = [
("2023-01-01 03:05:01", "object version 1"),
("2023-02-05 14:33:06", "object version 2"),
("2023-03-22 22:55:12", "object version 3"),
]

for version_date, content in versions:
# use freezegun to override system date, which in
# turn sets S3 object version LastModified date
with freeze_time(version_date):
s3_client.put_object(
Bucket=bucket_name,
Key=object_key,
Body=content,
)
yield s3_client, bucket_name, object_key
19 changes: 18 additions & 1 deletion tests/unit/util/test_reference.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime, timezone
from unittest import mock

from virus_clade_utils.util.reference import get_nextclade_dataset
from virus_clade_utils.util.reference import get_nextclade_dataset, get_s3_object_url


@mock.patch("subprocess.run")
Expand All @@ -11,3 +12,19 @@ def test_get_nextclade_dataset(tmp_path):
# datasetset version, as determined by the as_of_date being passed
# (returned version is temporarily hard-coded until Nextstrain provides the info we need)
assert "2024-07-17--12-57-03Z" in str(dataset_path)


def test_get_s3_object_url(s3_setup):
s3_client, bucket_name, object_key = s3_setup

target_date = datetime.strptime("2023-02-15", "%Y-%m-%d").replace(tzinfo=timezone.utc)

version_id, version_url = get_s3_object_url(bucket_name, object_key, target_date)

assert version_id is not None
s3_object = s3_client.get_object(Bucket=bucket_name, Key=object_key, VersionId=version_id)
last_modified = s3_object["LastModified"]

assert last_modified <= target_date
assert last_modified == datetime.strptime("2023-02-05 14:33:06", "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
assert version_url == f"https://{bucket_name}.s3.amazonaws.com/{object_key}?versionId={version_id}"
24 changes: 23 additions & 1 deletion tests/unit/util/test_sequence.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from collections import Counter
from datetime import datetime
from pathlib import Path

import polars as pl
import pytest
from virus_clade_utils.util.sequence import (
download_covid_genome_metadata,
filter_covid_genome_metadata,
get_covid_genome_metadata,
parse_sequence_assignments,
Expand Down Expand Up @@ -49,10 +51,30 @@ def test_get_covid_genome_metadata(test_file_path, metadata_file):
"genbank_accession",
"genbank_accession_rev",
}

assert expected_cols.issubset(metadata_cols)


@pytest.mark.parametrize(
"as_of, filename",
[
(None, f"{datetime.now().strftime('%Y-%m-%d')}-metadata.tsv.zst"),
("2023-03-20", "2023-03-20-metadata.tsv.zst"),
],
)
def test_download_covid_genome_metadata(s3_setup, tmp_path, mock_session, as_of, filename):
"""Test filenames saved by covid genome metadata download."""
s3_client, bucket_name, object_key = s3_setup
actual_filename = download_covid_genome_metadata(mock_session, bucket_name, object_key, tmp_path, as_of=as_of)
assert actual_filename.name == filename


def test_download_covid_genome_metadata_no_history(s3_setup, tmp_path, mock_session):
"""Test genome metadata download where there is no history that matches the as_of date."""
s3_client, bucket_name, object_key = s3_setup
with pytest.raises(ValueError):
download_covid_genome_metadata(mock_session, bucket_name, object_key, tmp_path, as_of="2000-01-01")


def test_filter_covid_genome_metadata():
test_genome_metadata = {
"date": ["2022-01-01", "2022-01-02", "2022-01-03", "2023-12-25", None, "2023-12-27"],
Expand Down

0 comments on commit a1fe96f

Please sign in to comment.