diff --git a/src/nplinker/metabolomics/gnps/gnps_downloader.py b/src/nplinker/metabolomics/gnps/gnps_downloader.py index c3df4527..4f2af084 100644 --- a/src/nplinker/metabolomics/gnps/gnps_downloader.py +++ b/src/nplinker/metabolomics/gnps/gnps_downloader.py @@ -18,7 +18,6 @@ class GNPSDownloader: Attributes: GNPS_DATA_DOWNLOAD_URL: URL template for downloading GNPS data. GNPS_DATA_DOWNLOAD_URL_FBMN: URL template for downloading GNPS data for FBMN. - gnps_format: GNPS workflow type. """ GNPS_DATA_DOWNLOAD_URL: str = ( @@ -27,52 +26,59 @@ class GNPSDownloader: GNPS_DATA_DOWNLOAD_URL_FBMN: str = ( "https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task={}&view=download_cytoscape_data" ) + GNPS2_DATA_DOWNLOAD_URL: str = "https://gnps2.org/taskzip?task={}" - def __init__(self, task_id: str, download_root: str | PathLike): + def __init__( + self, task_id: str, download_root: str | PathLike, gnps_version: str = "1" + ) -> None: """Initialize the GNPSDownloader. Args: task_id: GNPS task id, identifying the data to be downloaded. download_root: Path where to store the downloaded archive. + gnps_version: Version of GNPS platform that has been used to run the task. + Available values are "1" and "2". Choose "1" if the platform https://gnps.ucsd.edu/ + has been used; or "2" for the platform https://gnps2.org/. Raises: - ValueError: If the given task id does not correspond to a supported - GNPS workflow. + ValueError: If the given task id does not correspond to a supported GNPS workflow. + ValueError: If the given GNPS version is not valid. Examples: >>> GNPSDownloader("c22f44b14a3d450eb836d607cb9521bb", "~/downloads") """ - gnps_format = gnps_format_from_gnps1_task_id(task_id) - if gnps_format == GNPSFormat.Unknown: + if gnps_version == "1": + gnps_format = gnps_format_from_gnps1_task_id(task_id) + if gnps_format == GNPSFormat.Unknown: + raise ValueError( + f"Unknown workflow type for GNPS task '{task_id}'." + f"Supported GNPS workflows are described in the GNPSFormat enum, " + f"including such as 'METABOLOMICS-SNETS', 'METABOLOMICS-SNETS-V2' " + f"and 'FEATURE-BASED-MOLECULAR-NETWORKING'." + ) + self._gnps_format = gnps_format + self._file_name = gnps_format.value + "-" + task_id + ".zip" + elif gnps_version == "2": + self._file_name = task_id + ".tar" + else: raise ValueError( - f"Unknown workflow type for GNPS task '{task_id}'." - f"Supported GNPS workflows are described in the GNPSFormat enum, " - f"including such as 'METABOLOMICS-SNETS', 'METABOLOMICS-SNETS-V2' " - f"and 'FEATURE-BASED-MOLECULAR-NETWORKING'." + f"Invalid GNPS version '{gnps_version}'. Supported versions are '1' and '2'." ) self._task_id = task_id self._download_root: Path = Path(download_root) - self._gnps_format = gnps_format - self._file_name = gnps_format.value + "-" + self._task_id + ".zip" - - @property - def gnps_format(self) -> GNPSFormat: - """Get the GNPS workflow type. - - Returns: - GNPS workflow type. - """ - return self._gnps_format + self._gnps_version = gnps_version def download(self) -> Self: - """Download GNPS data. - - Note: GNPS data is downloaded using the POST method (empty payload is OK). - """ - download_url( - self.get_url(), self._download_root, filename=self._file_name, http_method="POST" - ) + """Download GNPS data.""" + if self._gnps_version == "1": + download_url( + self.get_url(), self._download_root, filename=self._file_name, http_method="POST" + ) + if self._gnps_version == "2": + download_url( + self.get_url(), self._download_root, filename=self._file_name, http_method="GET" + ) return self def get_download_file(self) -> str: @@ -97,6 +103,9 @@ def get_url(self) -> str: Returns: URL pointing to the GNPS data to be downloaded. """ - if self.gnps_format == GNPSFormat.FBMN: - return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL_FBMN.format(self._task_id) - return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL.format(self._task_id) + if self._gnps_version == "1": + if self._gnps_format == GNPSFormat.FBMN: + return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL_FBMN.format(self._task_id) + return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL.format(self._task_id) + if self._gnps_version == "2": + return GNPSDownloader.GNPS2_DATA_DOWNLOAD_URL.format(self._task_id) diff --git a/tests/unit/metabolomics/conftest.py b/tests/unit/metabolomics/conftest.py index 81637457..2ea042ca 100644 --- a/tests/unit/metabolomics/conftest.py +++ b/tests/unit/metabolomics/conftest.py @@ -145,6 +145,20 @@ def gnps_annotations_files(tmp_gnps_dir) -> dict[GNPSFormat, PathLike]: # +@pytest.fixture(scope="session") +def gnps2_website_is_down(): + """Check if the GNPS2 website is down.""" + gnps_url = "https://gnps2.org" + try: + r = httpx.get(gnps_url, follow_redirects=True) + if r.is_success: + return False + else: + return True + except httpx.HTTPError: + return True + + @pytest.fixture(scope="session") def gnps2_tar_files() -> dict[GNPSFormat, PathLike]: """Get the paths of the GNPS2 tar archives as a dict. diff --git a/tests/unit/metabolomics/test_gnps_downloader.py b/tests/unit/metabolomics/test_gnps_downloader.py index 0ff24836..9f2524f8 100644 --- a/tests/unit/metabolomics/test_gnps_downloader.py +++ b/tests/unit/metabolomics/test_gnps_downloader.py @@ -1,15 +1,13 @@ +import tarfile import zipfile import pytest from nplinker.metabolomics.gnps import GNPSDownloader from nplinker.metabolomics.gnps import GNPSFormat -@pytest.fixture(scope="module", autouse=True) -def setup_with_fixture(gnps_website_is_down): - if gnps_website_is_down: - pytest.skip( - "GNPS website is down, skipping all tests in this module!", allow_module_level=True - ) +def test_invalid_gnps_version(tmpdir): + with pytest.raises(ValueError, match="Invalid GNPS version '3'"): + GNPSDownloader("0ad6535e34d449788f297e712f43068a", tmpdir, "3") def test_unknown_workflow(tmpdir): @@ -18,37 +16,33 @@ def test_unknown_workflow(tmpdir): @pytest.mark.parametrize( - "task_id, expected", - [ - ["92036537c21b44c29e509291e53f6382", GNPSFormat.FBMN], - ["c22f44b14a3d450eb836d607cb9521bb", GNPSFormat.SNETS], - ["189e8bf16af145758b0a900f1c44ff4a", GNPSFormat.SNETSV2], - ], -) -def test_supported_workflows(task_id, expected, tmpdir): - downloader = GNPSDownloader(task_id, tmpdir) - assert downloader.gnps_format == expected - - -@pytest.mark.parametrize( - "task_id, filename", + "gnps_version, task_id, filename", [ [ + "1", "92036537c21b44c29e509291e53f6382", GNPSFormat.FBMN.value + "-92036537c21b44c29e509291e53f6382.zip", ], [ + "1", "c22f44b14a3d450eb836d607cb9521bb", GNPSFormat.SNETS.value + "-c22f44b14a3d450eb836d607cb9521bb.zip", ], [ + "1", "189e8bf16af145758b0a900f1c44ff4a", GNPSFormat.SNETSV2.value + "-189e8bf16af145758b0a900f1c44ff4a.zip", ], + [ + "2", + "206a7b40b7ed41c1ae6b4fbd2def3636", + "206a7b40b7ed41c1ae6b4fbd2def3636.tar", + ], + ["2", "2014f321d72542afb5216c932e0d5079", "2014f321d72542afb5216c932e0d5079.tar"], ], ) -def test_get_download_file(task_id, filename, tmpdir): - downloader = GNPSDownloader(task_id, tmpdir) +def test_get_download_file(gnps_version, task_id, filename, tmpdir): + downloader = GNPSDownloader(task_id, tmpdir, gnps_version) assert downloader.get_download_file() == tmpdir / filename @@ -66,24 +60,37 @@ def test_get_task_id(task_id, tmpdir): @pytest.mark.parametrize( - "task_id, url", + "gnps_version, task_id, url", [ [ + "1", "92036537c21b44c29e509291e53f6382", GNPSDownloader.GNPS_DATA_DOWNLOAD_URL_FBMN.format("92036537c21b44c29e509291e53f6382"), ], [ + "1", "c22f44b14a3d450eb836d607cb9521bb", GNPSDownloader.GNPS_DATA_DOWNLOAD_URL.format("c22f44b14a3d450eb836d607cb9521bb"), ], [ + "1", "189e8bf16af145758b0a900f1c44ff4a", GNPSDownloader.GNPS_DATA_DOWNLOAD_URL.format("189e8bf16af145758b0a900f1c44ff4a"), ], + [ + "2", + "206a7b40b7ed41c1ae6b4fbd2def3636", + GNPSDownloader.GNPS2_DATA_DOWNLOAD_URL.format("206a7b40b7ed41c1ae6b4fbd2def3636"), + ], + [ + "2", + "2014f321d72542afb5216c932e0d5079", + GNPSDownloader.GNPS2_DATA_DOWNLOAD_URL.format("2014f321d72542afb5216c932e0d5079"), + ], ], ) -def test_get_url(task_id, url, tmpdir): - downloader = GNPSDownloader(task_id, tmpdir) +def test_get_url(gnps_version, task_id, url, tmpdir): + downloader = GNPSDownloader(task_id, tmpdir, gnps_version) assert downloader.get_url() == url @@ -95,11 +102,32 @@ def test_get_url(task_id, url, tmpdir): ["189e8bf16af145758b0a900f1c44ff4a", GNPSFormat.SNETSV2], ], ) -def test_downloads_file(task_id, workflow, tmpdir, gnps_zip_files): - downloader = GNPSDownloader(task_id, tmpdir) +def test_download_gnps1(task_id, workflow, tmpdir, gnps_zip_files, gnps_website_is_down): + if gnps_website_is_down: + pytest.skip("GNPS website is down: https://gnps.ucsd.edu") + downloader = GNPSDownloader(task_id, tmpdir, gnps_version="1") downloader.download() actual = zipfile.ZipFile(downloader.get_download_file()) actual_names = actual.namelist() expected = zipfile.ZipFile(gnps_zip_files[workflow]) - expected_names = [x.filename for x in expected.filelist if x.compress_size > 0] - assert all(item in actual_names for item in expected_names) + expected_names = expected.namelist() + assert actual_names == expected_names + + +@pytest.mark.parametrize( + "task_id, workflow", + [ + ["2014f321d72542afb5216c932e0d5079", GNPSFormat.GNPS2FBMN], + ["206a7b40b7ed41c1ae6b4fbd2def3636", GNPSFormat.GNPS2CN], + ], +) +def test_download_gnps2(task_id, workflow, tmpdir, gnps2_tar_files, gnps2_website_is_down): + if gnps2_website_is_down: + pytest.skip("GNPS2 website is down: https://gnps2.org") + downloader = GNPSDownloader(task_id, tmpdir, gnps_version="2") + downloader.download() + actual = tarfile.open(downloader.get_download_file()) + actual_names = actual.getnames() + expected = tarfile.open(gnps2_tar_files[workflow]) + expected_names = expected.getnames() + assert actual_names == expected_names