From ea979987e7073aee9496840c697b544b7b5baf21 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 1 Apr 2024 12:25:54 -0400 Subject: [PATCH] WIP: First attempt at using JSON values in query string --- pywb/warcserver/inputrequest.py | 7 ++++++- pywb/warcserver/test/test_inputreq.py | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index 654610f50..6b6a441ca 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -244,6 +244,7 @@ def handle_binary(query): try: query = to_native_str(query.decode('utf-8')) query = unquote_plus(query) + # TODO: Convert Pythonic values to JSON values except UnicodeDecodeError: query = handle_binary(query) @@ -270,13 +271,16 @@ def handle_binary(query): values.append((item.name, item.value)) query = urlencode(values, True) + # TODO: Convert Pythonic values to JSON values elif mime.startswith('application/x-amf'): query = self.amf_parse(query, environ) + # TODO: Convert Pythonic values to JSON values? elif mime.startswith('application/json'): try: query = self.json_parse(query) + # TODO: Convert Pythonic values to JSON values except Exception as e: sys.stderr.write("Ignoring query, error parsing as json: " + query.decode("utf-8") + "\n") query = '' @@ -284,6 +288,7 @@ def handle_binary(query): elif mime.startswith('text/plain'): try: query = self.json_parse(query) + # TODO: Convert Pythonic values to JSON values except Exception as e: query = handle_binary(query) @@ -328,7 +333,7 @@ def _parser(json_obj, name=""): _parser(v, name) elif name: - data[get_key(name)] = str(json_obj) + data[get_key(name)] = json.dumps(json_obj) _parser(json.loads(string)) return urlencode(data) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index a7cc01cac..d4e957b52 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -82,44 +82,44 @@ def test_post_req(self): class TestPostQueryExtract(object): @classmethod def setup_class(cls): - cls.post_data = b'foo=bar&dir=%2Fbaz' + cls.post_data = b'foo=bar&dir=%2Fbaz&do=true&re=false&re=null' cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,' def test_post_extract_1(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', len(self.post_data), BytesIO(self.post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re.2=null' - assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz' + assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re.1=null' def test_post_extract_json(self): - post_data = b'{"a": "b", "c": {"a": 2}, "d": "e"}' + post_data = b'{"a": "b", "c": {"a": 2}, "d": "e", "f": true, "g": [false, null]}' mq = MethodQueryCanonicalizer('POST', 'application/json', len(post_data), BytesIO(post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e&f=true&g=false&g.2=null' def test_put_extract_method(self): mq = MethodQueryCanonicalizer('PUT', 'application/x-www-form-urlencoded', len(self.post_data), BytesIO(self.post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz&do=true&re=false&re.2=null' def test_post_extract_non_form_data_1(self): mq = MethodQueryCanonicalizer('POST', 'application/octet-stream', len(self.post_data), BytesIO(self.post_data)) #base64 encoded data - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA==' def test_post_extract_non_form_data_2(self): mq = MethodQueryCanonicalizer('POST', 'text/plain', len(self.post_data), BytesIO(self.post_data)) #base64 encoded data - assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6' + assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA==' def test_post_extract_length_invalid_ignore(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', @@ -136,13 +136,13 @@ def test_post_extract_length_too_short(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', len(self.post_data) - 4, BytesIO(self.post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=%2' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=%2&do=true&re=false' def test_post_extract_length_too_long(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', len(self.post_data) + 4, BytesIO(self.post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re.2=null' def test_post_extract_malformed_form_data(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', @@ -155,7 +155,7 @@ def test_post_extract_no_boundary_in_multipart_form_mimetype(self): mq = MethodQueryCanonicalizer('POST', 'multipart/form-data', len(self.post_data), BytesIO(self.post_data)) - assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6' + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=gTZsYEygNFAO4HICtYkZAGZQ2w6wAiw=' def test_options(self):