mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Handle Content-Type multipart/form-data without boundary (#599)
* Handle Content-Type multipart/form-data without boundary * Add tests for multipart/form-data change
This commit is contained in:
parent
de81efac78
commit
b66608c5f3
@ -463,6 +463,104 @@ com,example)/xyz.pdf 20140401052011 http://example.com/xyz.pdf application/http
|
||||
"""
|
||||
|
||||
|
||||
def test_multipart_form():
|
||||
test_data = b'\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: response\r\n\
|
||||
WARC-Record-ID: <urn:uuid:073fac44-c383-4a2b-980d-76fec83bd20d>\r\n\
|
||||
WARC-Date: 2020-11-19T19:54:34Z\r\n\
|
||||
WARC-Target-URI: https://example.com/ajax/bz?foo=bar\r\n\
|
||||
Content-Type: application/http;msgtype=response\r\n\
|
||||
WARC-Payload-Digest: sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ\r\n\
|
||||
Content-Length: 48\r\n\
|
||||
WARC-Block-Digest: sha1:XN45YTSBLG5PLJ4HA7DRDYGJBM5VW4UO\r\n\
|
||||
\r\n\
|
||||
Content-Type: text/html; charset="utf-8"\r\n\
|
||||
\r\n\
|
||||
ABCD\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: request\r\n\
|
||||
WARC-Record-ID: <urn:uuid:3084e79c-ae58-4bfd-8590-fcf2830fe896>\r\n\
|
||||
WARC-Date: 2020-11-19T19:54:34Z\r\n\
|
||||
WARC-Target-URI: https://example.com/ajax/bz?foo=bar\r\n\
|
||||
WARC-Concurrent-To: <urn:uuid:073fac44-c383-4a2b-980d-76fec83bd20d>\r\n\
|
||||
WARC-Block-Digest: sha1:LNYP3X3NWXQLUGDI745P4L4FK27XGP24\r\n\
|
||||
Content-Type: application/http;msgtype=request\r\n\
|
||||
Content-Length: 321\r\n\
|
||||
\r\n\
|
||||
POST /ajax/bz?foo=bar HTTP/1.1\r\n\
|
||||
Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryWUBf9liofZK0nuJd\r\n\
|
||||
content-Length: 199\r\n\
|
||||
\r\n\
|
||||
------WebKitFormBoundaryWUBf9liofZK0nuJd\r\n\
|
||||
Content-Disposition: form-data; name="q"\r\n\
|
||||
\r\n\
|
||||
[{"webSessionId":"pb2tr7:vx83uz:fdi8ta","user":"0"}]\r\n\
|
||||
------WebKitFormBoundaryWUBf9liofZK0nuJd--\r\n\
|
||||
\r\n\
|
||||
'
|
||||
options = dict(include_all=True, append_post=True)
|
||||
buff = BytesIO()
|
||||
test_record = BytesIO(test_data)
|
||||
write_cdx_index(buff, test_record, 'test.warc.gz', **options)
|
||||
print(buff.getvalue())
|
||||
assert buff.getvalue() == b"""\
|
||||
CDX N b a m s k r M S V g
|
||||
com,example)/ajax/bz?foo=bar&q=[{"websessionid":"pb2tr7:vx83uz:fdi8ta","user":"0"}] 20201119195434 https://example.com/ajax/bz?foo=bar unk text/html; 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 420 0 test.warc.gz
|
||||
com,example)/ajax/bz?foo=bar&q=[{"websessionid":"pb2tr7:vx83uz:fdi8ta","user":"0"}] 20201119195434 https://example.com/ajax/bz?foo=bar multipart/form-data - - - - 701 428 test.warc.gz
|
||||
"""
|
||||
|
||||
|
||||
def test_multipart_form_no_boundary():
|
||||
test_data = b'\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: response\r\n\
|
||||
WARC-Record-ID: <urn:uuid:3bc1606a-d517-487e-a6d5-bfeaebda2ec3>\r\n\
|
||||
WARC-Date: 2020-11-19T14:02:52Z\r\n\
|
||||
WARC-Target-URI: https://capi.connatix.com/core/story?v=77797\r\n\
|
||||
WARC-IP-Address: 18.221.6.219\r\n\
|
||||
Content-Type: application/http;msgtype=response\r\n\
|
||||
WARC-Payload-Digest: sha1:SIGZ3RJW5J7DUKEZ4R7RSYUZNGLETIS5\r\n\
|
||||
Content-Length: 41\r\n\
|
||||
WARC-Block-Digest: sha1:JXKKZNALIPOW7J2FX5XUTGQZXKBSGZLU\r\n\
|
||||
\r\n\
|
||||
Content-Type: multipart/form-data\r\n\
|
||||
\r\n\
|
||||
ABCD\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: request\r\n\
|
||||
WARC-Record-ID: <urn:uuid:d5e7186f-5725-4ed1-b199-56fbdf4bd805>\r\n\
|
||||
WARC-Date: 2020-11-19T14:02:52Z\r\n\
|
||||
WARC-Target-URI: https://capi.connatix.com/core/story?v=77797\r\n\
|
||||
WARC-Concurrent-To: <urn:uuid:3bc1606a-d517-487e-a6d5-bfeaebda2ec3>\r\n\
|
||||
WARC-Block-Digest: sha1:QJ2YUIKEWDSCLK5A2DHGLQ7WWEKYMO3W\r\n\
|
||||
Content-Type: application/http;msgtype=request\r\n\
|
||||
Content-Length: 111\r\n\
|
||||
\r\n\
|
||||
POST /core/story?v=77797 HTTP/1.1\r\n\
|
||||
Content-Length: 19\r\n\
|
||||
Content-Type: multipart/form-data\r\n\
|
||||
\r\n\
|
||||
{"text": "default"}\r\n\
|
||||
\r\n\
|
||||
'
|
||||
options = dict(include_all=True, append_post=True)
|
||||
buff = BytesIO()
|
||||
test_record = BytesIO(test_data)
|
||||
write_cdx_index(buff, test_record, 'test.warc.gz', **options)
|
||||
assert buff.getvalue() == b"""\
|
||||
CDX N b a m s k r M S V g
|
||||
com,connatix,capi)/core/story?__wb_post_data=eyj0zxh0ijogimrlzmf1bhqifq==&v=77797 20201119140252 https://capi.connatix.com/core/story?v=77797 unk multipart/form-data SIGZ3RJW5J7DUKEZ4R7RSYUZNGLETIS5 - - 453 0 test.warc.gz
|
||||
com,connatix,capi)/core/story?__wb_post_data=eyj0zxh0ijogimrlzmf1bhqifq==&v=77797 20201119140252 https://capi.connatix.com/core/story?v=77797 multipart/form-data - - - - 500 461 test.warc.gz
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -259,8 +259,12 @@ class MethodQueryCanonicalizer(object):
|
||||
if PY3:
|
||||
args['encoding'] = 'utf-8'
|
||||
|
||||
try:
|
||||
data = cgi.FieldStorage(**args)
|
||||
|
||||
except ValueError:
|
||||
# Content-Type multipart/form-data may lack "boundary" info
|
||||
query = handle_binary(query)
|
||||
else:
|
||||
values = []
|
||||
for item in data.list:
|
||||
values.append((item.name, item.value))
|
||||
|
@ -143,6 +143,13 @@ class TestPostQueryExtract(object):
|
||||
#base64 encoded data
|
||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_post_data=gTZsYEygNFAO4HICtYkZAGZQ2w6wAiw='
|
||||
|
||||
def test_post_extract_no_boundary_in_multipart_form_mimetype(self):
|
||||
mq = MethodQueryCanonicalizer('POST', 'multipart/form-data',
|
||||
len(self.post_data), BytesIO(self.post_data))
|
||||
|
||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
|
||||
|
||||
|
||||
def test_options(self):
|
||||
mq = MethodQueryCanonicalizer('OPTIONS', '', 0, BytesIO())
|
||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=options'
|
||||
|
Loading…
x
Reference in New Issue
Block a user