diff --git a/pyproject.toml b/pyproject.toml index 3212de1..5c5e282 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ dependencies = [ [project.optional-dependencies] dev = [ "coverage", + "freezegun", + "moto", "mypy", "pytest", "ruff", diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index c2b4edd..97efebf 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -3,14 +3,19 @@ awscli==1.32.116 # via virus-clade-utils (pyproject.toml) boto3==1.34.116 - # via virus-clade-utils (pyproject.toml) + # via + # virus-clade-utils (pyproject.toml) + # moto botocore==1.34.116 # via # awscli # boto3 + # moto # s3transfer certifi==2024.2.2 # via requests +cffi==1.17.1 + # via cryptography charset-normalizer==3.3.2 # via requests click==8.1.7 @@ -23,22 +28,34 @@ colorama==0.4.6 # via awscli coverage==7.5.3 # via virus-clade-utils (pyproject.toml) +cryptography==43.0.1 + # via moto docutils==0.16 # via awscli +freezegun==1.5.1 + # via virus-clade-utils (pyproject.toml) idna==3.7 # via requests iniconfig==2.0.0 # via pytest jellyfish==1.1.0 # via us +jinja2==3.1.4 + # via moto jmespath==1.0.1 # via # boto3 # botocore markdown-it-py==3.0.0 # via rich +markupsafe==2.1.5 + # via + # jinja2 + # werkzeug mdurl==0.1.2 # via markdown-it-py +moto==5.0.15 + # via virus-clade-utils (pyproject.toml) mypy==1.10.1 # via virus-clade-utils (pyproject.toml) mypy-extensions==1.0.0 @@ -59,6 +76,8 @@ pyarrow==16.1.0 # via virus-clade-utils (pyproject.toml) pyasn1==0.6.0 # via rsa +pycparser==2.22 + # via cffi pygments==2.18.0 # via rich pytest==8.2.1 @@ -66,13 +85,22 @@ pytest==8.2.1 python-dateutil==2.9.0.post0 # via # botocore + # freezegun + # moto # pandas pytz==2024.1 # via pandas pyyaml==6.0.1 - # via awscli + # via + # awscli + # responses requests==2.32.3 - # via virus-clade-utils (pyproject.toml) + # via + # virus-clade-utils (pyproject.toml) + # moto + # responses +responses==0.25.3 + # via moto rich==13.7.1 # via # virus-clade-utils (pyproject.toml) @@ -101,5 +129,10 @@ urllib3==2.2.1 # via # botocore # requests + # responses us==3.2.0 # via virus-clade-utils (pyproject.toml) +werkzeug==3.0.4 + # via moto +xmltodict==0.13.0 + # via moto diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9763dc7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,43 @@ +import boto3 +import pytest +import requests +from freezegun import freeze_time +from moto import mock_aws + + +@pytest.fixture +def mock_session(mocker): + """Session mock for testing functions that use requests.Session""" + mock_session = mocker.patch.object(requests, "Session", autospec=True) + mock_session.return_value.__enter__.return_value = mock_session + return mock_session + + +@pytest.fixture +def s3_setup(): + """Setup mock S3 bucket with versioned objects.""" + with mock_aws(): + bucket_name = "versioned-bucket" + object_key = "metadata/object-key/metadata.tsv.zst" + + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_bucket_versioning(Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"}) + + # Upload multiple versions of the object + versions = [ + ("2023-01-01 03:05:01", "object version 1"), + ("2023-02-05 14:33:06", "object version 2"), + ("2023-03-22 22:55:12", "object version 3"), + ] + + for version_date, content in versions: + # use freezegun to override system date, which in + # turn sets S3 object version LastModified date + with freeze_time(version_date): + s3_client.put_object( + Bucket=bucket_name, + Key=object_key, + Body=content, + ) + yield s3_client, bucket_name, object_key diff --git a/tests/unit/util/test_reference.py b/tests/unit/util/test_reference.py index 95d868f..5e95f62 100644 --- a/tests/unit/util/test_reference.py +++ b/tests/unit/util/test_reference.py @@ -1,6 +1,7 @@ +from datetime import datetime, timezone from unittest import mock -from virus_clade_utils.util.reference import get_nextclade_dataset +from virus_clade_utils.util.reference import get_nextclade_dataset, get_s3_object_url @mock.patch("subprocess.run") @@ -11,3 +12,19 @@ def test_get_nextclade_dataset(tmp_path): # datasetset version, as determined by the as_of_date being passed # (returned version is temporarily hard-coded until Nextstrain provides the info we need) assert "2024-07-17--12-57-03Z" in str(dataset_path) + + +def test_get_s3_object_url(s3_setup): + s3_client, bucket_name, object_key = s3_setup + + target_date = datetime.strptime("2023-02-15", "%Y-%m-%d").replace(tzinfo=timezone.utc) + + version_id, version_url = get_s3_object_url(bucket_name, object_key, target_date) + + assert version_id is not None + s3_object = s3_client.get_object(Bucket=bucket_name, Key=object_key, VersionId=version_id) + last_modified = s3_object["LastModified"] + + assert last_modified <= target_date + assert last_modified == datetime.strptime("2023-02-05 14:33:06", "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) + assert version_url == f"https://{bucket_name}.s3.amazonaws.com/{object_key}?versionId={version_id}" diff --git a/tests/unit/util/test_sequence.py b/tests/unit/util/test_sequence.py index 4bb7c17..3a2485a 100644 --- a/tests/unit/util/test_sequence.py +++ b/tests/unit/util/test_sequence.py @@ -1,9 +1,11 @@ from collections import Counter +from datetime import datetime from pathlib import Path import polars as pl import pytest from virus_clade_utils.util.sequence import ( + download_covid_genome_metadata, filter_covid_genome_metadata, get_covid_genome_metadata, parse_sequence_assignments, @@ -49,10 +51,30 @@ def test_get_covid_genome_metadata(test_file_path, metadata_file): "genbank_accession", "genbank_accession_rev", } - assert expected_cols.issubset(metadata_cols) +@pytest.mark.parametrize( + "as_of, filename", + [ + (None, f"{datetime.now().strftime('%Y-%m-%d')}-metadata.tsv.zst"), + ("2023-03-20", "2023-03-20-metadata.tsv.zst"), + ], +) +def test_download_covid_genome_metadata(s3_setup, tmp_path, mock_session, as_of, filename): + """Test filenames saved by covid genome metadata download.""" + s3_client, bucket_name, object_key = s3_setup + actual_filename = download_covid_genome_metadata(mock_session, bucket_name, object_key, tmp_path, as_of=as_of) + assert actual_filename.name == filename + + +def test_download_covid_genome_metadata_no_history(s3_setup, tmp_path, mock_session): + """Test genome metadata download where there is no history that matches the as_of date.""" + s3_client, bucket_name, object_key = s3_setup + with pytest.raises(ValueError): + download_covid_genome_metadata(mock_session, bucket_name, object_key, tmp_path, as_of="2000-01-01") + + def test_filter_covid_genome_metadata(): test_genome_metadata = { "date": ["2022-01-01", "2022-01-02", "2022-01-03", "2023-12-25", None, "2023-12-27"],