1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Handle amf requests (#321)

* Add representation for Amf requests to index them correctly

* rewind the stream in case of an error append during amf decoding. (pyamf seems to have a problem supporting multi-bytes utf8)

* fix python 2.7 retrocompatibility

* update inputrequest.py

* reorganize import and for appveyor to retest
This commit is contained in:
humberthardy 2018-05-21 22:29:33 -04:00 committed by Ilya Kreymer
parent f65ac7068f
commit dc883ec708
7 changed files with 171 additions and 30 deletions

View File

@ -2,6 +2,5 @@ certauth
youtube-dl
boto3
uwsgi
git+https://github.com/t0m/pyamf.git@python3
git+https://github.com/esnme/ultrajson.git
pysocks

View File

@ -43,6 +43,8 @@ class RewriteAMF(BufferedRewriter): #pragma: no cover
import traceback
traceback.print_exc()
print(e)
stream.seek(0)
return stream

64
pywb/warcserver/amf.py Normal file
View File

@ -0,0 +1,64 @@
import json
import six
from pyamf.remoting import Envelope, Request
from pyamf.flex.messaging import RemotingMessage
class Amf:
@staticmethod
def get_representation(request_object, max_calls=500):
max_calls = max_calls - 1
if max_calls < 0:
raise Exception("Amf.get_representation maximum number of calls reached")
if isinstance(request_object, Envelope):
# Remove order of Request
bodies = []
for i in request_object.bodies:
bodies.append(Amf.get_representation(i[1], max_calls))
bodies = sorted(bodies)
return "<Envelope>{bodies}</Envelope>".format(bodies="[" + ",".join(bodies) + "]")
elif isinstance(request_object, Request):
# Remove cyclic reference
target = request_object.target
body = Amf.get_representation(request_object.body, max_calls)
return "<Request target={target}>{body}</Request>".format(**locals())
elif isinstance(request_object, RemotingMessage):
# Remove random properties
operation = request_object.operation
body = Amf.get_representation(request_object.body, max_calls)
return "<RemotingMessage operation={operation}>{body}</RemotingMessage>".format(**locals())
elif isinstance(request_object, dict):
return json.dumps(request_object, sort_keys=True)
elif isinstance(request_object, list):
bodies = []
for i in request_object:
bodies.append(Amf.get_representation(i, max_calls))
return "[" + ",".join(bodies) + "]"
elif isinstance(request_object, six.string_types):
return request_object
elif request_object is None:
return ""
elif isinstance(request_object, object) and hasattr(request_object, "__dict__"):
classname = request_object.__class__.__name__
properties = request_object.__dict__
bodies = dict()
for prop in properties:
bodies[prop] = Amf.get_representation(getattr(request_object, prop), max_calls)
bodies = Amf.get_representation(bodies, max_calls)
return '<{classname}>{bodies}</{classname}>'.format(**locals())
else:
return repr(request_object)

View File

@ -1,6 +1,7 @@
from warcio.limitreader import LimitReader
from warcio.statusandheaders import StatusAndHeadersParser
from pywb.warcserver.amf import Amf
from pyamf.remoting import decode
from warcio.utils import to_native_str
from six.moves.urllib.parse import urlsplit, quote, unquote_plus, urlencode
@ -11,7 +12,6 @@ import base64
import cgi
#=============================================================================
class DirectWSGIInputRequest(object):
def __init__(self, env):
@ -264,30 +264,8 @@ class MethodQueryCanonicalizer(object):
def amf_parse(self, string, environ):
try:
from pyamf import remoting
res = remoting.decode(BytesIO(string))
#print(res)
body = res.bodies[0][1].body[0]
values = {}
if hasattr(body, 'body'):
values['body'] = body.body
if hasattr(body, 'source'):
values['source'] = body.source
if hasattr(body, 'operation'):
values['op'] = body.operation
if environ is not None:
environ['pywb.inputdata'] = res
query = urlencode(values)
#print(query)
return query
res = decode(BytesIO(string))
return urlencode({"request": Amf.get_representation(res)})
except Exception as e:
import traceback

View File

@ -0,0 +1,81 @@
from pywb.warcserver.amf import Amf
import pyamf
import uuid
from io import BytesIO
from pyamf.remoting import Envelope, Request, encode, decode
from pyamf.flex.messaging import RemotingMessage
class CustomObject:
secret = None
pyamf.register_class(CustomObject, "custom.object")
def generate_amf_request(request_body=None):
req = Request(target='UserService', body=request_body)
ev = Envelope(pyamf.AMF3)
ev['/0'] = req
return encode(ev).getvalue()
def generate_flex_request(message_body=None):
msg = RemotingMessage(operation='retrieveUser',
messageId=str(uuid.uuid4()).upper(),
body=message_body)
return generate_amf_request([msg])
class TestAmf(object):
def test_can_parse_custom_object(self):
a = CustomObject()
a.secret = "a"
encoded = generate_amf_request(request_body=[a])
decoded = decode(BytesIO(encoded))
assert Amf.get_representation(decoded) == \
'<Envelope>[<Request target=UserService>[<CustomObject>{"secret": "a"}</CustomObject>]</Request>]</Envelope>'
def test_parse_amf_request_with_envelope(self):
encoded = generate_amf_request([{"the": "body"}])
decoded = decode(BytesIO(encoded))
assert Amf.get_representation(decoded) == \
'<Envelope>[<Request target=UserService>[{"the": "body"}]</Request>]</Envelope>'
def test_parse_flex_request_with_envelope(self):
encoded = generate_flex_request([{"the": "body"}])
decoded = decode(BytesIO(encoded))
assert Amf.get_representation(decoded) == \
'<Envelope>[<Request target=UserService>[<RemotingMessage operation=retrieveUser>[{"the": "body"}]</RemotingMessage>]</Request>]</Envelope>'
def test_position_in_dict_ignored(self):
a = Request(target=None, body={"a": 1, "b": 2})
b = Request(target=None, body={"b": 2, "a": 1})
c = Request(target=None, body={"a": 2, "b": 1})
assert Amf.get_representation(a) == Amf.get_representation(b)
assert Amf.get_representation(a) != Amf.get_representation(c)
def test_order_of_array_preserved(self):
a = Request(target=None, body=[1, 2])
b = Request(target=None, body=[2, 1])
assert Amf.get_representation(a) != Amf.get_representation(b)
def test_limit_recursive_calls(self):
a = CustomObject()
a.secret = a
encoded = generate_amf_request(request_body=[a])
decoded = decode(BytesIO(encoded))
try:
Amf.get_representation(decoded)
assert False, "should not be called"
except Exception as e:
assert "maximum number of calls reached" in str(e)

View File

@ -2,9 +2,10 @@ from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputReques
from werkzeug.routing import Map, Rule
import webtest
import traceback
from six.moves.urllib.parse import parse_qsl
from io import BytesIO
from pyamf import AMF3
from pyamf.remoting import Request, Envelope, encode
#=============================================================================
@ -142,4 +143,16 @@ class TestPostQueryExtract(object):
mq = MethodQueryCanonicalizer('HEAD', '', 0, BytesIO())
assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=head'
def test_amf_parse(self):
mq = MethodQueryCanonicalizer('POST', 'application/x-amf', 0, BytesIO())
req = Request(target='t', body="")
ev_1 = Envelope(AMF3)
ev_1['/0'] = req
req = Request(target='t', body="alt_content")
ev_2 = Envelope(AMF3)
ev_2['/0'] = req
assert mq.amf_parse(encode(ev_1).getvalue(), None) != \
mq.amf_parse(encode(ev_2).getvalue(), None)

View File

@ -57,8 +57,12 @@ def generate_git_hash_py(pkg, filename='git_hash.py'):
def load_requirements(filename):
with open(filename, 'rt') as fh:
return fh.read().rstrip().split('\n')
requirements = fh.read().rstrip().split('\n')
if (sys.version_info > (3, 0)):
requirements.append("py3AMF")
else:
requirements.append("pyAMF")
return requirements
def get_package_data():
pkgs = ['static/*.*',