Skip to content

Commit

Permalink
Only use requests, not a mix of requests & urlopen
Browse files Browse the repository at this point in the history
We were:

1. In some cases, directly using requests
2. In some cases, directly using the stdlib's urlopen
3. In some cases, had a method named urlopen that simply passed
   things through to requests

This is unnecessarily confusing, and seems to primarily be done
for the benefit of mocking the network calls. However, as described
in the recently merged #1390,
I don't think mocking is appropriate here as it means we don't actually
catch problems.

This PR mostly focuses on getting unifying to only using requests
directly with as little indirection as possible. If any tests were
directly using mocks here, they will be replaced with something that
is testing things more directly as appropriate
  • Loading branch information
yuvipanda committed Dec 20, 2024
1 parent b7c1515 commit aec0e02
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 33 deletions.
8 changes: 3 additions & 5 deletions repo2docker/contentproviders/ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _fetch_version(self, api_url):
Borrowed from the Hydroshare provider.
"""
package_show_url = f"{api_url}package_show?id={self.dataset_id}"
resp = self.urlopen(package_show_url).json()
resp = self.session.get(package_show_url).json()
date = resp["result"]["metadata_modified"]
parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp()
Expand All @@ -35,8 +35,6 @@ def _fetch_version(self, api_url):
def _request(self, url, **kwargs):
return self.session.get(url, **kwargs)

urlopen = _request

def detect(self, source, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a CKAN dataset."""
parsed_url = urlparse(source)
Expand All @@ -58,7 +56,7 @@ def detect(self, source, ref=None, extra_args=None):
).geturl()

status_show_url = f"{api_url}status_show"
resp = self.urlopen(status_show_url)
resp = self.session.get(status_show_url)
if resp.status_code == 200:

# Activity ID may be present either as a query parameter, activity_id
Expand Down Expand Up @@ -97,7 +95,7 @@ def fetch(self, spec, output_dir, yield_output=False):
{"id": dataset_id}
)

resp = self.urlopen(
resp = self.session.get(
fetch_url,
headers={"accept": "application/json"},
)
Expand Down
15 changes: 0 additions & 15 deletions repo2docker/contentproviders/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,6 @@ def __init__(self):
def _request(self, url, **kwargs):
return self.session.get(url, **kwargs)

urlopen = _request

def _urlopen(self, req, headers=None):
"""A urlopen() helper"""
# someone passed a string, not a request
if not isinstance(req, request.Request):
req = request.Request(req)

req.add_header("User-Agent", f"repo2docker {__version__}")
if headers is not None:
for key, value in headers.items():
req.add_header(key, value)

return request.urlopen(req)

def doi2url(self, doi):
# Transform a DOI to a URL
# If not a doi, assume we have a URL and return
Expand Down
2 changes: 1 addition & 1 deletion repo2docker/contentproviders/figshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fetch(self, spec, output_dir, yield_output=False):
host = spec["host"]

yield f"Fetching Figshare article {article_id} in version {article_version}.\n"
resp = self.urlopen(
resp = self.session.get(
f'{host["api"]}{article_id}/versions/{article_version}',
headers={"accept": "application/json"},
)
Expand Down
6 changes: 3 additions & 3 deletions repo2docker/contentproviders/hydroshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Hydroshare(DoiProvider):

def _fetch_version(self, host):
"""Fetch resource modified date and convert to epoch"""
json_response = self.urlopen(host["version"].format(self.resource_id)).json()
json_response = self.session.get(host["version"].format(self.resource_id)).json()
date = next(
item for item in json_response["dates"] if item["type"] == "modified"
)["start_date"]
Expand Down Expand Up @@ -63,7 +63,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120):
yield f"Downloading {bag_url}.\n"

# bag downloads are prepared on demand and may need some time
conn = self.urlopen(bag_url)
conn = self.session.get(bag_url)
total_wait_time = 0
while (
conn.status_code == 200
Expand All @@ -77,7 +77,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120):
raise ContentProviderException(msg)
yield f"Bag is being prepared, requesting again in {wait_time} seconds.\n"
time.sleep(wait_time)
conn = self.urlopen(bag_url)
conn = self.session.get(bag_url)
if conn.status_code != 200:
msg = f"Failed to download bag. status code {conn.status_code}.\n"
yield msg
Expand Down
4 changes: 2 additions & 2 deletions repo2docker/contentproviders/zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def fetch(self, spec, output_dir, yield_output=False):
host = spec["host"]

yield f"Fetching Zenodo record {record_id}.\n"
resp = self.urlopen(
resp = self.session.get(
f'{host["api"]}{record_id}',
headers={"accept": "application/json"},
)
Expand All @@ -82,7 +82,7 @@ def fetch(self, spec, output_dir, yield_output=False):
if host["files"]:
yield f"Fetching Zenodo record {record_id} files.\n"
files_url = deep_get(record, host["files"])
resp = self.urlopen(
resp = self.session.get(
files_url,
headers={"accept": "application/json"},
)
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ def finalize_options(self):

def run(self):
import json
from urllib.request import urlopen
import requests

resp = urlopen(self.url, timeout=5)
resp_body = resp.read()
data = json.loads(resp_body.decode("utf-8"))
resp = requests.get(self.url)
data = resp.json()
if "installations" not in data:
raise ValueError("Malformed installation map.")

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/contentproviders/test_doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_url_headers(requests_mock):
doi = DoiProvider()

headers = {"test1": "value1", "Test2": "value2"}
result = doi.urlopen("https://mybinder.org", headers=headers)
result = doi.session.get("https://mybinder.org", headers=headers)
assert "test1" in result.request.headers
assert "Test2" in result.request.headers
assert result.request.headers["User-Agent"] == f"repo2docker {__version__}"
Expand Down
1 change: 0 additions & 1 deletion tests/unit/contentproviders/test_figshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from io import BytesIO
from tempfile import NamedTemporaryFile, TemporaryDirectory
from unittest.mock import patch
from urllib.request import Request, urlopen
from zipfile import ZipFile

import pytest
Expand Down
1 change: 0 additions & 1 deletion tests/unit/contentproviders/test_zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from io import BytesIO
from tempfile import NamedTemporaryFile, TemporaryDirectory
from unittest.mock import patch
from urllib.request import Request, urlopen
from zipfile import ZipFile

import pytest
Expand Down

0 comments on commit aec0e02

Please sign in to comment.