mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
post request mapping improvements: work on #178, including:
- mapping multipart/form-data same as x-www-form-urlencoded - parsing application/x-amf with pyamf - RewriteContentAMF for rewriting AMF response to match request - default encoding of other POST data as base64 encoded __wb_post_data param
This commit is contained in:
parent
e5e7c5a7df
commit
87da25c703
@ -184,14 +184,15 @@ class WbRequest(object):
|
|||||||
if not self.wb_url:
|
if not self.wb_url:
|
||||||
return
|
return
|
||||||
|
|
||||||
mime = self.env.get('CONTENT_TYPE', '').split(';')[0]
|
mime = self.env.get('CONTENT_TYPE', '')
|
||||||
length = self.env.get('CONTENT_LENGTH')
|
length = self.env.get('CONTENT_LENGTH')
|
||||||
stream = self.env['wsgi.input']
|
stream = self.env['wsgi.input']
|
||||||
|
|
||||||
buffered_stream = BytesIO()
|
buffered_stream = BytesIO()
|
||||||
|
|
||||||
post_query = extract_post_query('POST', mime, length, stream,
|
post_query = extract_post_query('POST', mime, length, stream,
|
||||||
buffered_stream=buffered_stream)
|
buffered_stream=buffered_stream,
|
||||||
|
environ=self.env)
|
||||||
|
|
||||||
if post_query:
|
if post_query:
|
||||||
self.env['wsgi.input'] = buffered_stream
|
self.env['wsgi.input'] = buffered_stream
|
||||||
|
@ -5,7 +5,8 @@ import yaml
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from chardet.universaldetector import UniversalDetector
|
from chardet.universaldetector import UniversalDetector
|
||||||
from io import BytesIO
|
from io import BytesIO, BufferedReader
|
||||||
|
from six.moves import zip
|
||||||
|
|
||||||
from pywb.rewrite.header_rewriter import RewrittenStatusAndHeaders
|
from pywb.rewrite.header_rewriter import RewrittenStatusAndHeaders
|
||||||
|
|
||||||
@ -21,7 +22,7 @@ from pywb.rewrite.regex_rewriters import JSNoneRewriter, JSLinkOnlyRewriter
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RewriteContent:
|
class RewriteContent(object):
|
||||||
HEAD_REGEX = re.compile(b'<\s*head\\b[^>]*[>]+', re.I)
|
HEAD_REGEX = re.compile(b'<\s*head\\b[^>]*[>]+', re.I)
|
||||||
|
|
||||||
TAG_REGEX = re.compile(b'^\s*\<')
|
TAG_REGEX = re.compile(b'^\s*\<')
|
||||||
@ -94,7 +95,7 @@ class RewriteContent:
|
|||||||
|
|
||||||
def rewrite_content(self, urlrewriter, status_headers, stream,
|
def rewrite_content(self, urlrewriter, status_headers, stream,
|
||||||
head_insert_func=None, urlkey='',
|
head_insert_func=None, urlkey='',
|
||||||
cdx=None, cookie_rewriter=None):
|
cdx=None, cookie_rewriter=None, env=None):
|
||||||
|
|
||||||
wb_url = urlrewriter.wburl
|
wb_url = urlrewriter.wburl
|
||||||
|
|
||||||
@ -118,9 +119,12 @@ class RewriteContent:
|
|||||||
|
|
||||||
status_headers = rewritten_headers.status_headers
|
status_headers = rewritten_headers.status_headers
|
||||||
|
|
||||||
# use rewritten headers, but no further rewriting needed
|
res = self.handle_custom_rewrite(rewritten_headers.text_type,
|
||||||
if rewritten_headers.text_type is None:
|
status_headers,
|
||||||
return (status_headers, self.stream_to_gen(stream), False)
|
stream,
|
||||||
|
env)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
|
||||||
# Handle text content rewriting
|
# Handle text content rewriting
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
@ -237,6 +241,11 @@ class RewriteContent:
|
|||||||
|
|
||||||
return (status_headers, gen, True)
|
return (status_headers, gen, True)
|
||||||
|
|
||||||
|
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
||||||
|
# use rewritten headers, but no further rewriting needed
|
||||||
|
if text_type is None:
|
||||||
|
return (status_headers, self.stream_to_gen(stream), False)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_html_charset(buff, status_headers):
|
def _extract_html_charset(buff, status_headers):
|
||||||
charset = None
|
charset = None
|
||||||
@ -360,3 +369,57 @@ class RewriteContent:
|
|||||||
|
|
||||||
finally:
|
finally:
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
|
class RewriteContentAMF(RewriteContent):
|
||||||
|
def handle_custom_rewrite(self, text_type, status_headers, stream, env):
|
||||||
|
|
||||||
|
if status_headers.get_header('Content-Type') == 'application/x-amf':
|
||||||
|
stream = self.rewrite_amf(stream, env)
|
||||||
|
|
||||||
|
return (super(RewriteContentAMF, self).
|
||||||
|
handle_custom_rewrite(text_type, status_headers, stream, env))
|
||||||
|
|
||||||
|
def rewrite_amf(self, stream, env):
|
||||||
|
try:
|
||||||
|
from pyamf import remoting
|
||||||
|
|
||||||
|
iobuff = BytesIO()
|
||||||
|
while True:
|
||||||
|
buff = stream.read()
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
iobuff.write(buff)
|
||||||
|
|
||||||
|
iobuff.seek(0)
|
||||||
|
res = remoting.decode(iobuff)
|
||||||
|
|
||||||
|
print('rewrite amf')
|
||||||
|
|
||||||
|
print(env.get('pywb.inputdata'))
|
||||||
|
|
||||||
|
if env and env.get('pywb.inputdata'):
|
||||||
|
inputdata = env.get('pywb.inputdata')
|
||||||
|
|
||||||
|
new_list = []
|
||||||
|
|
||||||
|
for src, target in zip(inputdata.bodies, res.bodies):
|
||||||
|
print(target[0] + ' = ' + src[0])
|
||||||
|
|
||||||
|
print('messageId => corrId ' + target[1].body.correlationId + ' => ' + src[1].body[0].messageId)
|
||||||
|
target[1].body.correlationId = src[1].body[0].messageId
|
||||||
|
|
||||||
|
new_list.append((src[0], target[1]))
|
||||||
|
|
||||||
|
res.bodies = new_list
|
||||||
|
|
||||||
|
return BytesIO(remoting.encode(res).getvalue())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
print(e)
|
||||||
|
return stream
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,10 +9,12 @@ import requests
|
|||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves.urllib.request import pathname2url, url2pathname
|
from six.moves.urllib.request import pathname2url, url2pathname
|
||||||
from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit
|
from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit, urlencode
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import base64
|
||||||
|
import cgi
|
||||||
|
|
||||||
from io import open, BytesIO
|
from io import open, BytesIO
|
||||||
|
|
||||||
@ -65,19 +67,18 @@ def to_native_str(value, encoding='iso-8859-1', func=lambda x: x):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def extract_post_query(method, mime, length, stream, buffered_stream=None):
|
def extract_post_query(method, mime, length, stream,
|
||||||
|
buffered_stream=None,
|
||||||
|
environ=None):
|
||||||
"""
|
"""
|
||||||
Extract a url-encoded form POST from stream
|
Extract a url-encoded form POST from stream
|
||||||
If not a application/x-www-form-urlencoded, or no missing
|
|
||||||
content length, return None
|
content length, return None
|
||||||
|
Attempt to decode application/x-www-form-urlencoded or multipart/*,
|
||||||
|
otherwise read whole block and b64encode
|
||||||
"""
|
"""
|
||||||
if method.upper() != 'POST':
|
if method.upper() != 'POST':
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if ((not mime or
|
|
||||||
not mime.lower().startswith('application/x-www-form-urlencoded'))):
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
length = int(length)
|
length = int(length)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
@ -101,11 +102,79 @@ def extract_post_query(method, mime, length, stream, buffered_stream=None):
|
|||||||
buffered_stream.write(post_query)
|
buffered_stream.write(post_query)
|
||||||
buffered_stream.seek(0)
|
buffered_stream.seek(0)
|
||||||
|
|
||||||
post_query = to_native_str(post_query)
|
if not mime:
|
||||||
post_query = unquote_plus(post_query)
|
mime = ''
|
||||||
|
|
||||||
|
if mime.startswith('application/x-www-form-urlencoded'):
|
||||||
|
post_query = to_native_str(post_query)
|
||||||
|
post_query = unquote_plus(post_query)
|
||||||
|
|
||||||
|
elif mime.startswith('multipart/'):
|
||||||
|
env = {'REQUEST_METHOD': 'POST',
|
||||||
|
'CONTENT_TYPE': mime,
|
||||||
|
'CONTENT_LENGTH': len(post_query)}
|
||||||
|
|
||||||
|
args = dict(fp=BytesIO(post_query),
|
||||||
|
environ=env,
|
||||||
|
keep_blank_values=True)
|
||||||
|
|
||||||
|
if six.PY3:
|
||||||
|
args['encoding'] = 'utf-8'
|
||||||
|
|
||||||
|
data = cgi.FieldStorage(**args)
|
||||||
|
|
||||||
|
values = []
|
||||||
|
for item in data.list:
|
||||||
|
values.append((item.name, item.value))
|
||||||
|
|
||||||
|
post_query = urlencode(values, True)
|
||||||
|
|
||||||
|
elif mime.startswith('application/x-amf'):
|
||||||
|
post_query = amf_parse(post_query, environ)
|
||||||
|
|
||||||
|
else:
|
||||||
|
post_query = base64.b64encode(post_query)
|
||||||
|
post_query = to_native_str(post_query)
|
||||||
|
post_query = '&__wb_post_data=' + post_query
|
||||||
|
|
||||||
return post_query
|
return post_query
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def amf_parse(string, environ):
|
||||||
|
try:
|
||||||
|
from pyamf import remoting
|
||||||
|
|
||||||
|
res = remoting.decode(BytesIO(string))
|
||||||
|
|
||||||
|
#print(res)
|
||||||
|
body = res.bodies[0][1].body[0]
|
||||||
|
|
||||||
|
values = {}
|
||||||
|
|
||||||
|
if hasattr(body, 'body'):
|
||||||
|
values['body'] = body.body
|
||||||
|
|
||||||
|
if hasattr(body, 'source'):
|
||||||
|
values['source'] = body.source
|
||||||
|
|
||||||
|
if hasattr(body, 'operation'):
|
||||||
|
values['op'] = body.operation
|
||||||
|
|
||||||
|
if environ is not None:
|
||||||
|
environ['pywb.inputdata'] = res
|
||||||
|
|
||||||
|
query = urlencode(values)
|
||||||
|
#print(query)
|
||||||
|
return query
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
print(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def append_post_query(url, post_query):
|
def append_post_query(url, post_query):
|
||||||
if not post_query:
|
if not post_query:
|
||||||
|
@ -90,8 +90,9 @@ IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
|
|||||||
# unsupported method
|
# unsupported method
|
||||||
>>> extract_post_query('PUT', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data))
|
>>> extract_post_query('PUT', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data))
|
||||||
|
|
||||||
# unsupported type
|
# base64 encode
|
||||||
>>> extract_post_query('POST', 'text/plain', len(post_data), BytesIO(post_data))
|
>>> extract_post_query('POST', 'text/plain', len(post_data), BytesIO(post_data))
|
||||||
|
'&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
|
||||||
|
|
||||||
# invalid length
|
# invalid length
|
||||||
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 'abc', BytesIO(post_data))
|
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 'abc', BytesIO(post_data))
|
||||||
|
@ -253,6 +253,7 @@ class ArchiveIndexEntryMixin(object):
|
|||||||
self['mime'] = def_mime
|
self['mime'] = def_mime
|
||||||
if mime:
|
if mime:
|
||||||
self['mime'] = self.MIME_RE.split(mime, 1)[0]
|
self['mime'] = self.MIME_RE.split(mime, 1)[0]
|
||||||
|
self['_content_type'] = mime
|
||||||
|
|
||||||
def extract_status(self, status_headers):
|
def extract_status(self, status_headers):
|
||||||
""" Extract status code only from status line
|
""" Extract status code only from status line
|
||||||
@ -390,7 +391,7 @@ class DefaultRecordParser(object):
|
|||||||
len_ = record.status_headers.get_header('Content-Length')
|
len_ = record.status_headers.get_header('Content-Length')
|
||||||
|
|
||||||
post_query = extract_post_query(method,
|
post_query = extract_post_query(method,
|
||||||
entry.get('mime'),
|
entry.get('_content_type'),
|
||||||
len_,
|
len_,
|
||||||
record.stream)
|
record.stream)
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from pywb.utils.timeutils import timestamp_now
|
|||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from pywb.framework.memento import MementoResponse
|
from pywb.framework.memento import MementoResponse
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_content import RewriteContent
|
from pywb.rewrite.rewrite_content import RewriteContentAMF
|
||||||
from pywb.warc.recordloader import ArchiveLoadFailed
|
from pywb.warc.recordloader import ArchiveLoadFailed
|
||||||
|
|
||||||
from pywb.webapp.views import HeadInsertView
|
from pywb.webapp.views import HeadInsertView
|
||||||
@ -40,7 +40,7 @@ class ReplayView(object):
|
|||||||
self.content_loader = content_loader
|
self.content_loader = content_loader
|
||||||
|
|
||||||
framed = config.get('framed_replay')
|
framed = config.get('framed_replay')
|
||||||
self.content_rewriter = RewriteContent(is_framed_replay=framed)
|
self.content_rewriter = RewriteContentAMF(is_framed_replay=framed)
|
||||||
|
|
||||||
self.head_insert_view = HeadInsertView.init_from_config(config)
|
self.head_insert_view = HeadInsertView.init_from_config(config)
|
||||||
|
|
||||||
@ -174,7 +174,8 @@ class ReplayView(object):
|
|||||||
stream=stream,
|
stream=stream,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
urlkey=cdx['urlkey'],
|
urlkey=cdx['urlkey'],
|
||||||
cdx=cdx))
|
cdx=cdx,
|
||||||
|
env=wbrequest.env))
|
||||||
|
|
||||||
(status_headers, response_iter, is_rewritten) = result
|
(status_headers, response_iter, is_rewritten) = result
|
||||||
|
|
||||||
|
3
setup.py
3
setup.py
@ -82,6 +82,7 @@ setup(
|
|||||||
'pyyaml',
|
'pyyaml',
|
||||||
'watchdog',
|
'watchdog',
|
||||||
'webencodings',
|
'webencodings',
|
||||||
|
'pyamf',
|
||||||
],
|
],
|
||||||
tests_require=[
|
tests_require=[
|
||||||
'pytest',
|
'pytest',
|
||||||
@ -91,7 +92,7 @@ setup(
|
|||||||
'mock',
|
'mock',
|
||||||
],
|
],
|
||||||
dependency_links=[
|
dependency_links=[
|
||||||
'git+https://github.com/ikreymer/fakeredis.git@zset-lex-ops#egg=fakeredis-0.6.2-zset-lex-ops',
|
'git+https://github.com/t0m/pyamf.git@python3'
|
||||||
],
|
],
|
||||||
cmdclass={'test': PyTest},
|
cmdclass={'test': PyTest},
|
||||||
test_suite='',
|
test_suite='',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user