diff --git a/pyodide_http/__init__.py b/pyodide_http/__init__.py index bad7237..c71be3b 100644 --- a/pyodide_http/__init__.py +++ b/pyodide_http/__init__.py @@ -5,7 +5,7 @@ except ImportError: _SHOULD_PATCH = False -__version__ = '0.1.0' +__version__ = "0.1.0" def patch_requests(continue_on_import_error: bool = False): diff --git a/pyodide_http/_core.py b/pyodide_http/_core.py index 2a9cda5..e05b52d 100644 --- a/pyodide_http/_core.py +++ b/pyodide_http/_core.py @@ -2,30 +2,32 @@ from dataclasses import dataclass, field from typing import Optional, Dict from email.parser import Parser + # need to import streaming here so that the web-worker is setup from ._streaming import send_streaming_request + class _RequestError(Exception): - def __init__(self,message=None,*,request=None,response=None): - self.request=request - self.response=response - self.message=message + def __init__(self, message=None, *, request=None, response=None): + self.request = request + self.response = response + self.message = message super().__init__(self.message) class _StreamingError(_RequestError): pass + class _StreamingTimeout(_StreamingError): pass - @dataclass class Request: method: str url: str - params: Optional[Dict[str,str]]=None + params: Optional[Dict[str, str]] = None body: Optional[bytes] = None headers: Dict[str, str] = field(default_factory=dict) timeout: int = 0 @@ -38,7 +40,7 @@ def set_body(self, body: bytes): def set_json(self, body: dict): self.set_header("Content-Type", "application/json; charset=utf-8") - self.set_body(json.dumps(body).encode('utf-8')) + self.set_body(json.dumps(body).encode("utf-8")) @dataclass @@ -48,27 +50,34 @@ class Response: body: bytes -_SHOWN_WARNING=False +_SHOWN_WARNING = False + def show_streaming_warning(): global _SHOWN_WARNING if not _SHOWN_WARNING: - _SHOWN_WARNING=True + _SHOWN_WARNING = True from js import console - console.warn("requests can't stream data in the main thread, using non-streaming fallback") + + console.warn( + "requests can't stream data in the main thread, using non-streaming fallback" + ) def send(request: Request, stream: bool = False) -> Response: if request.params: from js import URLSearchParams + params = URLSearchParams.new() for k, v in request.params.items(): params.append(k, v) - request.url += "?"+params.toString() + request.url += "?" + params.toString() from js import XMLHttpRequest + try: from js import importScripts + _IN_WORKER = True except ImportError: _IN_WORKER = False @@ -88,13 +97,13 @@ def send(request: Request, stream: bool = False) -> Response: # set timeout only if pyodide is in a worker, because # there is a warning not to set timeout on synchronous main thread # XMLHttpRequest https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/timeout - if _IN_WORKER and request.timeout!=0: - xhr.timeout=int(request.timeout*1000) + if _IN_WORKER and request.timeout != 0: + xhr.timeout = int(request.timeout * 1000) if _IN_WORKER: xhr.responseType = "arraybuffer" else: - xhr.overrideMimeType('text/plain; charset=ISO-8859-15') + xhr.overrideMimeType("text/plain; charset=ISO-8859-15") xhr.open(request.method, request.url, False) for name, value in request.headers.items(): @@ -107,10 +116,6 @@ def send(request: Request, stream: bool = False) -> Response: if _IN_WORKER: body = xhr.response.to_py().tobytes() else: - body = xhr.response.encode('ISO-8859-15') + body = xhr.response.encode("ISO-8859-15") - return Response( - status_code=xhr.status, - headers=headers, - body=body - ) + return Response(status_code=xhr.status, headers=headers, body=body) diff --git a/pyodide_http/_requests.py b/pyodide_http/_requests.py index fdb4c41..9ff53f2 100644 --- a/pyodide_http/_requests.py +++ b/pyodide_http/_requests.py @@ -4,7 +4,7 @@ from requests.utils import get_encoding_from_headers, CaseInsensitiveDict from ._core import Request, send -from ._core import _StreamingError,_StreamingTimeout +from ._core import _StreamingError, _StreamingTimeout _IS_PATCHED = False @@ -15,9 +15,7 @@ class PyodideHTTPAdapter(BaseAdapter): def __init__(self): super().__init__() - def send( - self, request, **kwargs - ): + def send(self, request, **kwargs): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest ` being sent. :param stream: (optional) Whether to stream the request content. @@ -31,12 +29,12 @@ def send( :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. """ - stream = kwargs.get('stream', False) + stream = kwargs.get("stream", False) pyodide_request = Request(request.method, request.url) - pyodide_request.timeout=kwargs.get('timeout',0) + pyodide_request.timeout = kwargs.get("timeout", 0) if not pyodide_request.timeout: - pyodide_request.timeout=0 - pyodide_request.params=None # this is done in preparing request now + pyodide_request.timeout = 0 + pyodide_request.params = None # this is done in preparing request now pyodide_request.headers = dict(request.headers) if request.body: pyodide_request.set_body(request.body) @@ -44,11 +42,14 @@ def send( resp = send(pyodide_request, stream) except _StreamingTimeout: from requests import ConnectTimeout + raise ConnectTimeout(request=pyodide_request) except _StreamingError: from requests import ConnectionError + raise ConnectionError(request=pyodide_request) import requests + response = requests.Response() # Fallback to None if there's no status_code, for whatever reason. response.status_code = getattr(resp, "status_code", None) @@ -63,20 +64,17 @@ def send( # non-streaming response, make it look like a stream response.raw = BytesIO(resp.body) - def new_read(self,amt=None,decode_content=False,cache_content=False): + def new_read(self, amt=None, decode_content=False, cache_content=False): return self.old_read(amt) # make the response stream look like a urllib3 stream - response.raw.old_read=response.raw.read - response.raw.read=new_read.__get__(response.raw,type(response.raw)) - + response.raw.old_read = response.raw.read + response.raw.read = new_read.__get__(response.raw, type(response.raw)) - response.reason = '' + response.reason = "" response.url = request.url return response - - def close(self): """Cleans up adapter specific items.""" pass @@ -94,13 +92,15 @@ def patch(): return import requests - requests.sessions.Session._old_init=requests.sessions.Session.__init__ + + requests.sessions.Session._old_init = requests.sessions.Session.__init__ + def new_init(self): self._old_init() self.mount("https://", PyodideHTTPAdapter()) - self.mount("http://", PyodideHTTPAdapter()) + self.mount("http://", PyodideHTTPAdapter()) - requests.sessions.Session._old_init=requests.sessions.Session.__init__ - requests.sessions.Session.__init__=new_init + requests.sessions.Session._old_init = requests.sessions.Session.__init__ + requests.sessions.Session.__init__ = new_init _IS_PATCHED = True diff --git a/pyodide_http/_streaming.py b/pyodide_http/_streaming.py index 2930a45..8d3141d 100644 --- a/pyodide_http/_streaming.py +++ b/pyodide_http/_streaming.py @@ -25,6 +25,7 @@ from js import crossOriginIsolated from pyodide.ffi import to_js from urllib.request import Request + SUCCESS_HEADER = -1 SUCCESS_EOF = -2 ERROR_TIMEOUT = -3 @@ -137,14 +138,14 @@ def _obj_from_dict(dict_val: dict) -> any: class _ReadStream(io.RawIOBase): - def __init__(self, int_buffer, byte_buffer,timeout,worker,connection_id): + def __init__(self, int_buffer, byte_buffer, timeout, worker, connection_id): self.int_buffer = int_buffer self.byte_buffer = byte_buffer self.read_pos = 0 self.read_len = 0 - self.connection_id=connection_id - self.worker=worker - self.timeout=int(1000*timeout) if timeout>0 else None + self.connection_id = connection_id + self.worker = worker + self.timeout = int(1000 * timeout) if timeout > 0 else None def __del__(self): self.worker.postMessage(_obj_from_dict({"close": self.connection_id})) @@ -165,15 +166,17 @@ def readinto(self, byte_obj) -> bool: # wait for the worker to send something js.Atomics.store(self.int_buffer, 0, 0) self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id})) - if js.Atomics.wait(self.int_buffer, 0, 0, self.timeout)=='timed-out': + if js.Atomics.wait(self.int_buffer, 0, 0, self.timeout) == "timed-out": from ._core import _StreamingTimeout - raise _StreamingTimeout + + raise _StreamingTimeout data_len = self.int_buffer[0] if data_len > 0: self.read_len = data_len self.read_pos = 0 - elif data_len==ERROR_EXCEPTION: + elif data_len == ERROR_EXCEPTION: from ._core import _StreamingError + raise _StreamingError else: # EOF, free the buffers and return zero @@ -184,8 +187,9 @@ def readinto(self, byte_obj) -> bool: return 0 # copy from int32array to python bytes ret_length = min(self.read_len, len(byte_obj)) - self.byte_buffer.subarray( - self.read_pos, self.read_pos+ret_length).assign_to(byte_obj[0:ret_length]) + self.byte_buffer.subarray(self.read_pos, self.read_pos + ret_length).assign_to( + byte_obj[0:ret_length] + ) self.read_len -= ret_length self.read_pos += ret_length return ret_length @@ -194,18 +198,20 @@ def readinto(self, byte_obj) -> bool: class _StreamingFetcher: def __init__(self): # make web-worker and data buffer on startup - dataBlob = js.Blob.new([_STREAMING_WORKER_CODE], _obj_from_dict( - {"type": 'application/javascript'})) + dataBlob = js.Blob.new( + [_STREAMING_WORKER_CODE], _obj_from_dict({"type": "application/javascript"}) + ) dataURL = js.URL.createObjectURL(dataBlob) self._worker = js.Worker.new(dataURL) def send(self, request): from ._core import Response + headers = request.headers body = request.body fetch_data = {"headers": headers, "body": body, "method": request.method} # start the request off in the worker - timeout=int(1000*request.timeout) if request.timeout>0 else None + timeout = int(1000 * request.timeout) if request.timeout > 0 else None shared_buffer = js.SharedArrayBuffer.new(1048576) int_buffer = js.Int32Array.new(shared_buffer) byte_buffer = js.Uint8Array.new(shared_buffer, 8) @@ -213,15 +219,34 @@ def send(self, request): js.Atomics.store(int_buffer, 0, 0) js.Atomics.notify(int_buffer, 0) absolute_url = js.URL.new(request.url, js.location).href - js.console.log(_obj_from_dict( - {"buffer": shared_buffer, "url": absolute_url, "fetchParams": fetch_data})) - self._worker.postMessage(_obj_from_dict( - {"buffer": shared_buffer, "url": absolute_url, "fetchParams": fetch_data})) + js.console.log( + _obj_from_dict( + { + "buffer": shared_buffer, + "url": absolute_url, + "fetchParams": fetch_data, + } + ) + ) + self._worker.postMessage( + _obj_from_dict( + { + "buffer": shared_buffer, + "url": absolute_url, + "fetchParams": fetch_data, + } + ) + ) # wait for the worker to send something js.Atomics.wait(int_buffer, 0, 0, timeout) if int_buffer[0] == 0: from ._core import _StreamingTimeout - raise _StreamingTimeout("Timeout connecting to streaming request",request=request, response=None) + + raise _StreamingTimeout( + "Timeout connecting to streaming request", + request=request, + response=None, + ) if int_buffer[0] == SUCCESS_HEADER: # got response # header length is in second int of intBuffer @@ -236,8 +261,16 @@ def send(self, request): return Response( status_code=response_obj["status"], headers=response_obj["headers"], - body=io.BufferedReader(_ReadStream( - int_buffer, byte_buffer,request.timeout,self._worker,response_obj["connectionID"]), buffer_size=1048576) + body=io.BufferedReader( + _ReadStream( + int_buffer, + byte_buffer, + request.timeout, + self._worker, + response_obj["connectionID"], + ), + buffer_size=1048576, + ), ) if int_buffer[0] == ERROR_EXCEPTION: string_len = int_buffer[1] @@ -245,7 +278,10 @@ def send(self, request): decoder = js.TextDecoder.new() json_str = decoder.decode(byte_buffer.slice(0, string_len)) from ._core import _StreamingError - raise _StreamingError(f"Exception thrown in fetch: {json_str}",request=request, response=None) + + raise _StreamingError( + f"Exception thrown in fetch: {json_str}", request=request, response=None + ) if crossOriginIsolated: @@ -253,10 +289,12 @@ def send(self, request): else: _fetcher = None + def send_streaming_request(request: Request): if _fetcher: return _fetcher.send(request) else: from ._core import show_streaming_warning + show_streaming_warning() return False diff --git a/pyodide_http/_urllib.py b/pyodide_http/_urllib.py index 46f7ae5..223a2d4 100644 --- a/pyodide_http/_urllib.py +++ b/pyodide_http/_urllib.py @@ -18,7 +18,7 @@ def makefile(self, mode): def urlopen(url): - method = 'GET' + method = "GET" data = None headers = {} if isinstance(url, urllib.request.Request): @@ -36,16 +36,17 @@ def urlopen(url): # data itself is uncompressed. This will cause problems while decoding our # fake response. headers_without_content_length = { - k: v - for k, v in resp.headers.items() - if k != 'content-length' + k: v for k, v in resp.headers.items() if k != "content-length" } response_data = ( - b'HTTP/1.1 ' + str(resp.status_code).encode('ascii') + b"\n" + - "\n".join( + b"HTTP/1.1 " + + str(resp.status_code).encode("ascii") + + b"\n" + + "\n".join( f"{key}: {value}" for key, value in headers_without_content_length.items() - ).encode('ascii') + b"\n\n" + - resp.body + ).encode("ascii") + + b"\n\n" + + resp.body ) response = HTTPResponse(FakeSock(response_data)) diff --git a/setup.py b/setup.py index e9bf91a..e3ef1ef 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ def setup_package(): from pyodide_http import __version__ + setup( name="pyodide_http", version=__version__, @@ -14,7 +15,7 @@ def setup_package(): packages=setuptools.find_packages(exclude=["tests"]), license="MIT", description="Patch requests, urllib and urllib3 to make them work in Pyodide", - long_description='Patch requests, urllib and urllib3 to make them work in Pyodide', + long_description="Patch requests, urllib and urllib3 to make them work in Pyodide", long_description_content_type="text/markdown", classifiers=[], install_requires=[], diff --git a/tests/test_non_streaming.py b/tests/test_non_streaming.py index 5196a5b..78a48d5 100644 --- a/tests/test_non_streaming.py +++ b/tests/test_non_streaming.py @@ -4,23 +4,26 @@ from pytest_pyodide import run_in_pyodide, spawn_web_server from pytest import fixture + @fixture(scope="module") def dist_dir(request): # return pyodide dist dir relative to top level of webserver - p=Path(request.config.getoption('--dist-dir')).resolve() - p=p.relative_to(Path(__file__).parent.parent) + p = Path(request.config.getoption("--dist-dir")).resolve() + p = p.relative_to(Path(__file__).parent.parent) return p + @fixture(scope="module") def web_server_base(): - server_folder=Path(__file__).parent.parent + server_folder = Path(__file__).parent.parent with spawn_web_server(server_folder) as server: server_hostname, server_port, _ = server - base_url=f"http://{server_hostname}:{server_port}/" + base_url = f"http://{server_hostname}:{server_port}/" yield base_url -def _install_package(selenium,base_url): - wheel_folder=Path(__file__).parent.parent / "dist" + +def _install_package(selenium, base_url): + wheel_folder = Path(__file__).parent.parent / "dist" selenium.run_js( f""" @@ -30,7 +33,7 @@ def _install_package(selenium,base_url): selenium.run_async(f'import micropip\nawait micropip.install("requests")') for wheel in wheel_folder.glob("*.whl"): - url = base_url +"dist/" + str(wheel.name) + url = base_url + "dist/" + str(wheel.name) selenium.run_async(f'await micropip.install("{url}")') selenium.run( @@ -42,32 +45,37 @@ def _install_package(selenium,base_url): ) -def test_install_package(selenium_standalone,web_server_base): - _install_package(selenium_standalone,web_server_base) +def test_install_package(selenium_standalone, web_server_base): + _install_package(selenium_standalone, web_server_base) -def test_requests_get(selenium_standalone,dist_dir,web_server_base): - _install_package(selenium_standalone,web_server_base) +def test_requests_get(selenium_standalone, dist_dir, web_server_base): + _install_package(selenium_standalone, web_server_base) @run_in_pyodide - def test_fn(selenium_standalone,base_url): + def test_fn(selenium_standalone, base_url): import requests - print("get:",base_url) - resp=requests.get(f"{base_url}/yt-4.0.4-cp310-cp310-emscripten_3_1_14_wasm32.whl") - data=resp.content + + print("get:", base_url) + resp = requests.get( + f"{base_url}/yt-4.0.4-cp310-cp310-emscripten_3_1_14_wasm32.whl" + ) + data = resp.content return len(data) - assert test_fn(selenium_standalone,f"{web_server_base}{dist_dir}/")==11373926 + assert test_fn(selenium_standalone, f"{web_server_base}{dist_dir}/") == 11373926 + -def test_requests_404(selenium_standalone,dist_dir,web_server_base): - _install_package(selenium_standalone,web_server_base) +def test_requests_404(selenium_standalone, dist_dir, web_server_base): + _install_package(selenium_standalone, web_server_base) @run_in_pyodide - def test_fn(selenium_standalone,base_url): + def test_fn(selenium_standalone, base_url): import requests - print("get:",base_url) - resp=requests.get(f"{base_url}/surely_this_file_does_not_exist.hopefully.") - response=resp.status_code + + print("get:", base_url) + resp = requests.get(f"{base_url}/surely_this_file_does_not_exist.hopefully.") + response = resp.status_code return response - assert test_fn(selenium_standalone,f"{web_server_base}{dist_dir}/")==404 + assert test_fn(selenium_standalone, f"{web_server_base}{dist_dir}/") == 404 diff --git a/tests/test_streaming.py b/tests/test_streaming.py index bf7f39a..193611b 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -21,8 +21,9 @@ def spawn_web_server_custom(server_folder, custom_headers): tmp_dir = tempfile.mkdtemp() log_path = Path(tmp_dir) / "http-server.log" q = multiprocessing.Queue() - p = multiprocessing.Process(target=_run_web_server, args=( - q, log_path, server_folder, custom_headers)) + p = multiprocessing.Process( + target=_run_web_server, args=(q, log_path, server_folder, custom_headers) + ) try: p.start() @@ -92,13 +93,15 @@ def service_actions(): @fixture(scope="session", params=["isolated", "non-isolated"]) def web_server_main(request): if request.param == "isolated": - headers = {"x-mylovelyheader": "123", "Cross-Origin-Opener-Policy": "same-origin", - "Cross-Origin-Embedder-Policy": "require-corp"} + headers = { + "x-mylovelyheader": "123", + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "require-corp", + } else: headers = {"x-mylovelyheader": "123"} # this header is needed otherwise chrome strips the headers in a cross origin request (which all selenium requests are) - headers["Access-Control-Expose-Headers"] = ",".join( - [x for x in headers.keys()]) + headers["Access-Control-Expose-Headers"] = ",".join([x for x in headers.keys()]) """Web server that serves files in the dist directory""" with spawn_web_server_custom(request.config.option.dist_dir, headers) as output: yield output @@ -107,7 +110,7 @@ def web_server_main(request): @fixture(scope="module") def dist_dir(request): # return pyodide dist dir relative to top level of webserver - p = Path(request.config.getoption('--dist-dir')).resolve() + p = Path(request.config.getoption("--dist-dir")).resolve() p = p.relative_to(Path(__file__).parent.parent) return p @@ -116,8 +119,7 @@ def dist_dir(request): def web_server_dist(request): server_folder = Path(__file__).parent.parent headers = {"x-mylovelyheader": "123"} - headers["Access-Control-Expose-Headers"] = ",".join( - [x for x in headers.keys()]) + headers["Access-Control-Expose-Headers"] = ",".join([x for x in headers.keys()]) with spawn_web_server_custom(server_folder, headers) as server: server_hostname, server_port, _ = server base_url = f"http://{server_hostname}:{server_port}/" @@ -184,6 +186,7 @@ def get_install_package_code(base_url): """ return code + # def test_requests_hango(selenium_standalone,web_server_dist,big_file_path): # import time # while True: @@ -194,8 +197,8 @@ def test_requests_stream_worker(selenium_standalone, web_server_dist, big_file_p test_filename, test_size = big_file_path fetch_url = f"{web_server_dist}{test_filename}" resp = selenium_standalone.run_webworker( - get_install_package_code(web_server_dist) + - f""" + get_install_package_code(web_server_dist) + + f""" import js #assert(js.crossOriginIsolated==True) import requests @@ -213,7 +216,8 @@ def test_requests_stream_worker(selenium_standalone, web_server_dist, big_file_p # check streaming is really happening assert (data_count>1) data_len - """) + """ + ) assert resp == big_file_path[1] @@ -224,8 +228,8 @@ def test_requests_404(selenium_standalone, dist_dir, web_server_dist): @run_in_pyodide def test_fn(selenium_standalone, base_url): import requests - resp = requests.get( - f"{base_url}/surely_this_file_does_not_exist.hopefully.") + + resp = requests.get(f"{base_url}/surely_this_file_does_not_exist.hopefully.") response = resp.status_code return response @@ -236,18 +240,23 @@ def test_install_package_isolated(selenium_standalone, web_server_dist): _install_package(selenium_standalone, web_server_dist) -def test_requests_stream_main_thread(selenium_standalone, dist_dir, web_server_dist, big_file_path): +def test_requests_stream_main_thread( + selenium_standalone, dist_dir, web_server_dist, big_file_path +): _install_package(selenium_standalone, web_server_dist) test_filename, test_size = big_file_path @run_in_pyodide def test_fn(selenium_standalone, fetch_url): import requests + resp = requests.get(fetch_url, stream=True) data = resp.content return len(data) - assert test_fn(selenium_standalone, - f"{web_server_dist}{test_filename}") == test_size + + assert ( + test_fn(selenium_standalone, f"{web_server_dist}{test_filename}") == test_size + ) def test_response_headers(selenium_standalone, web_server_dist, big_file_path): @@ -257,19 +266,24 @@ def test_response_headers(selenium_standalone, web_server_dist, big_file_path): @run_in_pyodide def test_fn(selenium_standalone, fetch_url): import requests + resp = requests.get(fetch_url, stream=True) - assert (resp.headers["X-MyLovelyHeader"] == "123") + assert resp.headers["X-MyLovelyHeader"] == "123" + test_fn(selenium_standalone, f"{web_server_dist}{test_filename}") + # test if two requests in parallel actually stream together rather than one at a time -def test_requests_parallel_stream_workers(request,selenium_standalone, web_server_dist, big_file_path): - if str(request.node.name).find("non-isolated")!=-1: +def test_requests_parallel_stream_workers( + request, selenium_standalone, web_server_dist, big_file_path +): + if str(request.node.name).find("non-isolated") != -1: return test_filename, _ = big_file_path fetch_url = f"{web_server_dist}{test_filename}" resp = selenium_standalone.run_webworker( - get_install_package_code(web_server_dist) + - f""" + get_install_package_code(web_server_dist) + + f""" import js # parallel streaming only works if isolated assert(js.crossOriginIsolated) @@ -301,5 +315,6 @@ def test_requests_parallel_stream_workers(request,selenium_standalone, web_serve # check parallel streaming is happening assert (data_minimum_non_zero!=None and data_minimum_non_zero[0]