1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

inputrequest/indexing: Fix #471: failed playback due to encoding issue (#480)

* Handle incorrectly formatted form data; address #471.

* Attempt to always decode application/x-www-form-urlencoded form-data as utf-8, if fails to decode, treat it as binary post data (base64 encode and add with __wb_post_data=)
This commit is contained in:
Rebecca Lynn Cremona 2019-06-21 12:41:35 -04:00 committed by Ilya Kreymer
parent 56fc26333e
commit 193607eed8
2 changed files with 20 additions and 5 deletions

View File

@ -228,9 +228,18 @@ class MethodQueryCanonicalizer(object):
if not mime:
mime = ''
if mime.startswith('application/x-www-form-urlencoded'):
def handle_binary(query):
query = base64.b64encode(query)
query = to_native_str(query)
query = unquote_plus(query)
query = '__wb_post_data=' + query
return query
if mime.startswith('application/x-www-form-urlencoded'):
try:
query = to_native_str(query.decode('utf-8'))
query = unquote_plus(query)
except UnicodeDecodeError:
query = handle_binary(query)
elif mime.startswith('multipart/'):
env = {'REQUEST_METHOD': 'POST',
@ -256,9 +265,7 @@ class MethodQueryCanonicalizer(object):
query = self.amf_parse(query, environ)
else:
query = base64.b64encode(query)
query = to_native_str(query)
query = '__wb_post_data=' + query
query = handle_binary(query)
self.query = query

View File

@ -83,6 +83,7 @@ class TestPostQueryExtract(object):
@classmethod
def setup_class(cls):
cls.post_data = b'foo=bar&dir=%2Fbaz'
cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,'
def test_post_extract_1(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
@ -135,6 +136,13 @@ class TestPostQueryExtract(object):
assert mq.append_query('http://example.com/') == 'http://example.com/?foo=bar&dir=/baz'
def test_post_extract_malformed_form_data(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
len(self.binary_post_data), BytesIO(self.binary_post_data))
#base64 encoded data
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_post_data=gTZsYEygNFAO4HICtYkZAGZQ2w6wAiw='
def test_options(self):
mq = MethodQueryCanonicalizer('OPTIONS', '', 0, BytesIO())
assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=options'