From d877cd28895e3592286d6d4948bd26f46a44a8b9 Mon Sep 17 00:00:00 2001 From: TAHRI Ahmed R Date: Thu, 21 Sep 2023 13:35:25 +0200 Subject: [PATCH] :bug: Fix unattended crash when server specify a non-compliant Location header on redirection (#20) --- HISTORY.md | 3 +++ src/niquests/sessions.py | 12 ++++++++++-- tests/test_testserver.py | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 865c0a040f..073992840f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -55,6 +55,9 @@ Release History - An invalid content-type definition would cause the charset being evaluated to `True`, thus making the program crash. - Given `proxies` could be mutated when environment proxies were evaluated and injected. This package should not modify your inputs. For context see https://github.com/psf/requests/issues/6118 +- A server could specify a `Location` header that does not comply to HTTP specifications and could lead to an unexpected exception. + We try to fall back to Unicode decoding if the typical and expected Latin-1 would fail. If that fails too, a proper exception is raised. + For context see https://github.com/psf/requests/issues/6026 2.32.1 (2023-09-12) ------------------- diff --git a/src/niquests/sessions.py b/src/niquests/sessions.py index 9671ceaf8f..0e172e43e8 100644 --- a/src/niquests/sessions.py +++ b/src/niquests/sessions.py @@ -47,6 +47,7 @@ from .exceptions import ( ChunkedEncodingError, ContentDecodingError, + HTTPError, InvalidSchema, TooManyRedirects, ) @@ -828,8 +829,15 @@ def get_redirect_target(self, resp: Response) -> str | None: # It is more likely to get UTF8 header rather than latin1. # This causes incorrect handling of UTF8 encoded location headers. # To solve this, we re-encode the location in latin1. - location = location.encode("latin1") - return to_native_string(location, "utf8") + try: + return to_native_string(location.encode("latin1"), "utf8") + except UnicodeDecodeError: + try: + return to_native_string(location.encode("utf-8"), "utf8") + except (UnicodeDecodeError, UnicodeEncodeError) as e: + raise HTTPError( + "Response specify a Location header but is unreadable. This is a violation." + ) from e return None def should_strip_auth(self, old_url: str, new_url: str) -> bool: diff --git a/tests/test_testserver.py b/tests/test_testserver.py index 041bcf9469..28aa67e990 100644 --- a/tests/test_testserver.py +++ b/tests/test_testserver.py @@ -52,6 +52,21 @@ def test_text_response(self): assert r.text == "roflol" assert r.headers["Content-Length"] == "6" + def test_invalid_location_response(self): + server = Server.text_response_server( + "HTTP/1.1 302 PERMANENT-REDIRECTION\r\n" + "Location: http://localhost:1/search/?q=ïðåçèäåíòû+ÑØÀ\r\n\r\n" + ) + + with server as (host, port): + with pytest.raises(niquests.exceptions.ConnectionError) as exc: + niquests.get(f"http://{host}:{port}") + msg = exc.value.args[0].args[0] + assert ( + "/search/?q=%C3%AF%C3%B0%C3%A5%C3%A7%C3%A8%C3%A4%C3%A5%C3%AD%C3%B2%C3%BB+%C3%91%C3%98%C3%80" + in msg + ) + def test_basic_response(self): """the basic response server returns an empty http response""" with Server.basic_response_server() as (host, port):