From aec0e02b9739e3b11437c59d6a18abc57190a141 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 20 Dec 2024 15:46:49 -0800 Subject: [PATCH] Only use requests, not a mix of requests & urlopen We were: 1. In some cases, directly using requests 2. In some cases, directly using the stdlib's urlopen 3. In some cases, had a method named urlopen that simply passed things through to requests This is unnecessarily confusing, and seems to primarily be done for the benefit of mocking the network calls. However, as described in the recently merged https://github.com/jupyterhub/repo2docker/pull/1390, I don't think mocking is appropriate here as it means we don't actually catch problems. This PR mostly focuses on getting unifying to only using requests directly with as little indirection as possible. If any tests were directly using mocks here, they will be replaced with something that is testing things more directly as appropriate --- repo2docker/contentproviders/ckan.py | 8 +++----- repo2docker/contentproviders/doi.py | 15 --------------- repo2docker/contentproviders/figshare.py | 2 +- repo2docker/contentproviders/hydroshare.py | 6 +++--- repo2docker/contentproviders/zenodo.py | 4 ++-- setup.py | 7 +++---- tests/unit/contentproviders/test_doi.py | 2 +- tests/unit/contentproviders/test_figshare.py | 1 - tests/unit/contentproviders/test_zenodo.py | 1 - 9 files changed, 13 insertions(+), 33 deletions(-) diff --git a/repo2docker/contentproviders/ckan.py b/repo2docker/contentproviders/ckan.py index 2618fd06e..7f0dec775 100644 --- a/repo2docker/contentproviders/ckan.py +++ b/repo2docker/contentproviders/ckan.py @@ -25,7 +25,7 @@ def _fetch_version(self, api_url): Borrowed from the Hydroshare provider. """ package_show_url = f"{api_url}package_show?id={self.dataset_id}" - resp = self.urlopen(package_show_url).json() + resp = self.session.get(package_show_url).json() date = resp["result"]["metadata_modified"] parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp() @@ -35,8 +35,6 @@ def _fetch_version(self, api_url): def _request(self, url, **kwargs): return self.session.get(url, **kwargs) - urlopen = _request - def detect(self, source, ref=None, extra_args=None): """Trigger this provider for things that resolve to a CKAN dataset.""" parsed_url = urlparse(source) @@ -58,7 +56,7 @@ def detect(self, source, ref=None, extra_args=None): ).geturl() status_show_url = f"{api_url}status_show" - resp = self.urlopen(status_show_url) + resp = self.session.get(status_show_url) if resp.status_code == 200: # Activity ID may be present either as a query parameter, activity_id @@ -97,7 +95,7 @@ def fetch(self, spec, output_dir, yield_output=False): {"id": dataset_id} ) - resp = self.urlopen( + resp = self.session.get( fetch_url, headers={"accept": "application/json"}, ) diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index cf43a1578..2dc34b561 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -27,21 +27,6 @@ def __init__(self): def _request(self, url, **kwargs): return self.session.get(url, **kwargs) - urlopen = _request - - def _urlopen(self, req, headers=None): - """A urlopen() helper""" - # someone passed a string, not a request - if not isinstance(req, request.Request): - req = request.Request(req) - - req.add_header("User-Agent", f"repo2docker {__version__}") - if headers is not None: - for key, value in headers.items(): - req.add_header(key, value) - - return request.urlopen(req) - def doi2url(self, doi): # Transform a DOI to a URL # If not a doi, assume we have a URL and return diff --git a/repo2docker/contentproviders/figshare.py b/repo2docker/contentproviders/figshare.py index 6c7b26c86..e8fec3fe7 100644 --- a/repo2docker/contentproviders/figshare.py +++ b/repo2docker/contentproviders/figshare.py @@ -74,7 +74,7 @@ def fetch(self, spec, output_dir, yield_output=False): host = spec["host"] yield f"Fetching Figshare article {article_id} in version {article_version}.\n" - resp = self.urlopen( + resp = self.session.get( f'{host["api"]}{article_id}/versions/{article_version}', headers={"accept": "application/json"}, ) diff --git a/repo2docker/contentproviders/hydroshare.py b/repo2docker/contentproviders/hydroshare.py index 4104d97a6..90c41914f 100755 --- a/repo2docker/contentproviders/hydroshare.py +++ b/repo2docker/contentproviders/hydroshare.py @@ -15,7 +15,7 @@ class Hydroshare(DoiProvider): def _fetch_version(self, host): """Fetch resource modified date and convert to epoch""" - json_response = self.urlopen(host["version"].format(self.resource_id)).json() + json_response = self.session.get(host["version"].format(self.resource_id)).json() date = next( item for item in json_response["dates"] if item["type"] == "modified" )["start_date"] @@ -63,7 +63,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120): yield f"Downloading {bag_url}.\n" # bag downloads are prepared on demand and may need some time - conn = self.urlopen(bag_url) + conn = self.session.get(bag_url) total_wait_time = 0 while ( conn.status_code == 200 @@ -77,7 +77,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120): raise ContentProviderException(msg) yield f"Bag is being prepared, requesting again in {wait_time} seconds.\n" time.sleep(wait_time) - conn = self.urlopen(bag_url) + conn = self.session.get(bag_url) if conn.status_code != 200: msg = f"Failed to download bag. status code {conn.status_code}.\n" yield msg diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 6982c3a7c..31ea17415 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -73,7 +73,7 @@ def fetch(self, spec, output_dir, yield_output=False): host = spec["host"] yield f"Fetching Zenodo record {record_id}.\n" - resp = self.urlopen( + resp = self.session.get( f'{host["api"]}{record_id}', headers={"accept": "application/json"}, ) @@ -82,7 +82,7 @@ def fetch(self, spec, output_dir, yield_output=False): if host["files"]: yield f"Fetching Zenodo record {record_id} files.\n" files_url = deep_get(record, host["files"]) - resp = self.urlopen( + resp = self.session.get( files_url, headers={"accept": "application/json"}, ) diff --git a/setup.py b/setup.py index 58abff7c0..d37aa17cc 100644 --- a/setup.py +++ b/setup.py @@ -26,11 +26,10 @@ def finalize_options(self): def run(self): import json - from urllib.request import urlopen + import requests - resp = urlopen(self.url, timeout=5) - resp_body = resp.read() - data = json.loads(resp_body.decode("utf-8")) + resp = requests.get(self.url) + data = resp.json() if "installations" not in data: raise ValueError("Malformed installation map.") diff --git a/tests/unit/contentproviders/test_doi.py b/tests/unit/contentproviders/test_doi.py index 58cae5f5b..f2de2b50b 100644 --- a/tests/unit/contentproviders/test_doi.py +++ b/tests/unit/contentproviders/test_doi.py @@ -24,7 +24,7 @@ def test_url_headers(requests_mock): doi = DoiProvider() headers = {"test1": "value1", "Test2": "value2"} - result = doi.urlopen("https://mybinder.org", headers=headers) + result = doi.session.get("https://mybinder.org", headers=headers) assert "test1" in result.request.headers assert "Test2" in result.request.headers assert result.request.headers["User-Agent"] == f"repo2docker {__version__}" diff --git a/tests/unit/contentproviders/test_figshare.py b/tests/unit/contentproviders/test_figshare.py index 760c0e487..881c8513b 100644 --- a/tests/unit/contentproviders/test_figshare.py +++ b/tests/unit/contentproviders/test_figshare.py @@ -5,7 +5,6 @@ from io import BytesIO from tempfile import NamedTemporaryFile, TemporaryDirectory from unittest.mock import patch -from urllib.request import Request, urlopen from zipfile import ZipFile import pytest diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index a1e1226fb..1152ce2cf 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -5,7 +5,6 @@ from io import BytesIO from tempfile import NamedTemporaryFile, TemporaryDirectory from unittest.mock import patch -from urllib.request import Request, urlopen from zipfile import ZipFile import pytest