diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index 03fa0171c..065602b7a 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -51,9 +51,15 @@ def doi2url(self, doi): try: resp = self._request(f"https://doi.org/{doi}") resp.raise_for_status() - # If the DOI doesn't resolve, just return URL - except HTTPError: - return doi + except HTTPError as e: + # If the DOI doesn't exist, just return URL + if e.response.status_code == 404: + return doi + # Reraise any other errors because if the DOI service is down (or + # we hit a rate limit) we don't want to silently continue to the + # default Git provider as this leads to a misleading error. + logging.error(f"DOI {doi} does not resolve: {e}") + raise return resp.url else: # Just return what is actulally just a URL diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 5d02f723b..6982c3a7c 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -22,19 +22,26 @@ def __init__(self): "hostname": [ "https://sandbox.zenodo.org/record/", "http://sandbox.zenodo.org/record/", + "http://sandbox.zenodo.org/records/", ], "api": "https://sandbox.zenodo.org/api/records/", - "filepath": "files", - "filename": "filename", - "download": "links.download", + "files": "links.files", + "filepath": "entries", + "filename": "key", + "download": "links.content", "type": "metadata.upload_type", }, { - "hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"], + "hostname": [ + "https://zenodo.org/record/", + "http://zenodo.org/record/", + "https://zenodo.org/records/", + ], "api": "https://zenodo.org/api/records/", - "filepath": "files", - "filename": "filename", - "download": "links.download", + "files": "links.files", + "filepath": "entries", + "filename": "key", + "download": "links.content", "type": "metadata.upload_type", }, { @@ -43,6 +50,7 @@ def __init__(self): "http://data.caltech.edu/records/", ], "api": "https://data.caltech.edu/api/record/", + "files": "", "filepath": "metadata.electronic_location_and_access", "filename": "electronic_name.0", "download": "uniform_resource_identifier", @@ -69,9 +77,17 @@ def fetch(self, spec, output_dir, yield_output=False): f'{host["api"]}{record_id}', headers={"accept": "application/json"}, ) - record = resp.json() + if host["files"]: + yield f"Fetching Zenodo record {record_id} files.\n" + files_url = deep_get(record, host["files"]) + resp = self.urlopen( + files_url, + headers={"accept": "application/json"}, + ) + record = resp.json() + files = deep_get(record, host["filepath"]) only_one_file = len(files) == 1 for file_ref in files: diff --git a/tests/unit/contentproviders/test_doi.py b/tests/unit/contentproviders/test_doi.py index dab3d7fcd..301b50beb 100644 --- a/tests/unit/contentproviders/test_doi.py +++ b/tests/unit/contentproviders/test_doi.py @@ -30,8 +30,14 @@ def test_url_headers(requests_mock): assert result.request.headers["User-Agent"] == f"repo2docker {__version__}" -def test_unresolving_doi(): +@pytest.mark.parametrize( + "requested_doi, expected", + [ + ("10.5281/zenodo.3242074", "https://zenodo.org/records/3242074"), + # Unresolving DOI: + ("10.1/1234", "10.1/1234"), + ], +) +def test_doi2url(requested_doi, expected): doi = DoiProvider() - - fakedoi = "10.1/1234" - assert doi.doi2url(fakedoi) is fakedoi + assert doi.doi2url(requested_doi) == expected diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index 0755163da..38737ae1a 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -92,16 +92,31 @@ def test_fetch_software_from_github_archive(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo record created from a # GitHub repository via the Zenodo-GitHub integration with zenodo_archive() as zen_path: - mock_response = { + mock_record = { "files": [ { "filename": "some_dir/afake.zip", - "links": {"download": f"file://{zen_path}"}, } ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "other"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "some_dir/afake.zip", + "links": {"content": f"file://{zen_path}"}, + } + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get(f"file://{zen_path}", content=open(zen_path, "rb").read()) zen = Zenodo() @@ -121,18 +136,33 @@ def test_fetch_software(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo software record with a # ZIP file in it with zenodo_archive() as zen_path: - mock_response = { + mock_record = { "files": [ { # this is the difference to the GitHub generated one, # the ZIP file isn't in a directory "filename": "afake.zip", - "links": {"download": f"file://{zen_path}"}, } ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "software"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "afake.zip", + "links": {"content": f"file://{zen_path}"}, + } + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get(f"file://{zen_path}", content=open(zen_path, "rb").read()) with TemporaryDirectory() as d: @@ -151,20 +181,38 @@ def test_fetch_data(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo data record with zenodo_archive() as a_zen_path: with zenodo_archive() as b_zen_path: - mock_response = { + mock_record = { "files": [ { "filename": "afake.zip", - "links": {"download": f"file://{a_zen_path}"}, }, { "filename": "bfake.zip", - "links": {"download": f"file://{b_zen_path}"}, }, ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "data"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "afake.zip", + "links": {"content": f"file://{a_zen_path}"}, + }, + { + "key": "bfake.zip", + "links": {"content": f"file://{b_zen_path}"}, + }, + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get( f"file://{a_zen_path}", content=open(a_zen_path, "rb").read() )