From 7f980f49cdf348ba91bf3039e0488741206168d7 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 13 Dec 2024 21:14:05 -0800 Subject: [PATCH 1/4] Resolve DOI more cleanly Using doi.org, we only care to find out *where* the doi is pointing to. We don't need to go fetch the contents of that page fully. --- repo2docker/contentproviders/doi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index 64b93202..5769b2ff 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -49,7 +49,9 @@ def doi2url(self, doi): doi = normalize_doi(doi) try: - resp = self._request(f"https://doi.org/{doi}") + # We don't need to fetch the *contents* of the page the doi resolves to - + # only need to know what it redirects to. + resp = self._request(f"https://doi.org/{doi}", allow_redirects=False) resp.raise_for_status() except HTTPError as e: # If the DOI doesn't exist, just return URL @@ -60,7 +62,7 @@ def doi2url(self, doi): # default Git provider as this leads to a misleading error. self.log.error(f"DOI {doi} does not resolve: {e}") raise - return resp.url + return resp.headers['Location'] else: # Just return what is actulally just a URL return doi From 4f287dd4245e5b8f54bacd97dc9cd8a612dfb719 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Sat, 14 Dec 2024 08:34:52 -0800 Subject: [PATCH 2/4] Update call counts in tests --- tests/unit/contentproviders/test_hydroshare.py | 4 ++-- tests/unit/contentproviders/test_zenodo.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/contentproviders/test_hydroshare.py b/tests/unit/contentproviders/test_hydroshare.py index 0c8acfd1..fba7c686 100755 --- a/tests/unit/contentproviders/test_hydroshare.py +++ b/tests/unit/contentproviders/test_hydroshare.py @@ -75,8 +75,8 @@ def test_detect_hydroshare(requests_mock): assert ( Hydroshare().detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected ) - # assert 3 calls were made, 2 to resolve the DOI (302 + 200) and another to fetch the version - assert requests_mock.call_count == 3 + # assert 2 calls were made, 1 to resolve the DOI (302) and another to fetch the version + assert requests_mock.call_count == 2 requests_mock.reset_mock() assert ( diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index 38737ae1..185cb737 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -66,8 +66,8 @@ def test_detect_zenodo(test_input, expected, requests_mock): assert Zenodo().detect(test_input[0]) == expected assert Zenodo().detect(test_input[1]) == expected assert Zenodo().detect(test_input[2]) == expected - # only two of the three calls above have to resolve a DOI (2 req per doi resolution) - assert requests_mock.call_count == 4 + # only two of the three calls above have to resolve a DOI (1 req per doi resolution) + assert requests_mock.call_count == 2 requests_mock.reset_mock() # Don't trigger the Zenodo content provider From 65a0299bd03bef726119ea6ba1caf25b0f6f9f9f Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Sat, 14 Dec 2024 08:49:55 -0800 Subject: [PATCH 3/4] Update tests to account for not following redirects --- tests/unit/contentproviders/test_dataverse.py | 4 ++-- tests/unit/contentproviders/test_doi.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/contentproviders/test_dataverse.py b/tests/unit/contentproviders/test_dataverse.py index abdfe456..54e45863 100644 --- a/tests/unit/contentproviders/test_dataverse.py +++ b/tests/unit/contentproviders/test_dataverse.py @@ -70,8 +70,8 @@ def doi_resolver(req, context): assert requests_mock.call_count == 0 # valid Dataverse DOIs trigger this content provider assert Dataverse().detect(test_input[0]) == expected[0] - # 4: doi resolution (302), File, doi resolution (302), then dataset - assert requests_mock.call_count == 4 + # 4: doi resolution (302), doi resolution (302) + assert requests_mock.call_count == 2 requests_mock.reset_mock() assert Dataverse().detect(test_input[1]) == expected[0] diff --git a/tests/unit/contentproviders/test_doi.py b/tests/unit/contentproviders/test_doi.py index 301b50be..58cae5f5 100644 --- a/tests/unit/contentproviders/test_doi.py +++ b/tests/unit/contentproviders/test_doi.py @@ -33,7 +33,7 @@ def test_url_headers(requests_mock): @pytest.mark.parametrize( "requested_doi, expected", [ - ("10.5281/zenodo.3242074", "https://zenodo.org/records/3242074"), + ("10.5281/zenodo.3242074", "https://zenodo.org/record/3242074"), # Unresolving DOI: ("10.1/1234", "10.1/1234"), ], From 0e6f3b87e453594e71ac27e4ecae9f2742ee9ab1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:50:21 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- repo2docker/contentproviders/doi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index 5769b2ff..ee51a83e 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -62,7 +62,7 @@ def doi2url(self, doi): # default Git provider as this leads to a misleading error. self.log.error(f"DOI {doi} does not resolve: {e}") raise - return resp.headers['Location'] + return resp.headers["Location"] else: # Just return what is actulally just a URL return doi