From 7f980f49cdf348ba91bf3039e0488741206168d7 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 13 Dec 2024 21:14:05 -0800 Subject: [PATCH] Resolve DOI more cleanly Using doi.org, we only care to find out *where* the doi is pointing to. We don't need to go fetch the contents of that page fully. --- repo2docker/contentproviders/doi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index 64b93202..5769b2ff 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -49,7 +49,9 @@ def doi2url(self, doi): doi = normalize_doi(doi) try: - resp = self._request(f"https://doi.org/{doi}") + # We don't need to fetch the *contents* of the page the doi resolves to - + # only need to know what it redirects to. + resp = self._request(f"https://doi.org/{doi}", allow_redirects=False) resp.raise_for_status() except HTTPError as e: # If the DOI doesn't exist, just return URL @@ -60,7 +62,7 @@ def doi2url(self, doi): # default Git provider as this leads to a misleading error. self.log.error(f"DOI {doi} does not resolve: {e}") raise - return resp.url + return resp.headers['Location'] else: # Just return what is actulally just a URL return doi