diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index 2ee3cd7d..f910d2e3 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -228,9 +228,18 @@ class MethodQueryCanonicalizer(object): if not mime: mime = '' - if mime.startswith('application/x-www-form-urlencoded'): + def handle_binary(query): + query = base64.b64encode(query) query = to_native_str(query) - query = unquote_plus(query) + query = '__wb_post_data=' + query + return query + + if mime.startswith('application/x-www-form-urlencoded'): + try: + query = to_native_str(query.decode('utf-8')) + query = unquote_plus(query) + except UnicodeDecodeError: + query = handle_binary(query) elif mime.startswith('multipart/'): env = {'REQUEST_METHOD': 'POST', @@ -256,9 +265,7 @@ class MethodQueryCanonicalizer(object): query = self.amf_parse(query, environ) else: - query = base64.b64encode(query) - query = to_native_str(query) - query = '__wb_post_data=' + query + query = handle_binary(query) self.query = query diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index b4f275ac..fe5cf67c 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -83,6 +83,7 @@ class TestPostQueryExtract(object): @classmethod def setup_class(cls): cls.post_data = b'foo=bar&dir=%2Fbaz' + cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,' def test_post_extract_1(self): mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', @@ -135,6 +136,13 @@ class TestPostQueryExtract(object): assert mq.append_query('http://example.com/') == 'http://example.com/?foo=bar&dir=/baz' + def test_post_extract_malformed_form_data(self): + mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded', + len(self.binary_post_data), BytesIO(self.binary_post_data)) + + #base64 encoded data + assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_post_data=gTZsYEygNFAO4HICtYkZAGZQ2w6wAiw=' + def test_options(self): mq = MethodQueryCanonicalizer('OPTIONS', '', 0, BytesIO()) assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=options'