From dc883ec70820941955533489cfac39780c86e481 Mon Sep 17 00:00:00 2001 From: humberthardy Date: Mon, 21 May 2018 22:29:33 -0400 Subject: [PATCH] Handle amf requests (#321) * Add representation for Amf requests to index them correctly * rewind the stream in case of an error append during amf decoding. (pyamf seems to have a problem supporting multi-bytes utf8) * fix python 2.7 retrocompatibility * update inputrequest.py * reorganize import and for appveyor to retest --- extra_requirements.txt | 1 - pywb/rewrite/rewrite_amf.py | 2 + pywb/warcserver/amf.py | 64 +++++++++++++++++++++ pywb/warcserver/inputrequest.py | 30 ++-------- pywb/warcserver/test/test_amf.py | 81 +++++++++++++++++++++++++++ pywb/warcserver/test/test_inputreq.py | 15 ++++- setup.py | 8 ++- 7 files changed, 171 insertions(+), 30 deletions(-) create mode 100644 pywb/warcserver/amf.py create mode 100644 pywb/warcserver/test/test_amf.py diff --git a/extra_requirements.txt b/extra_requirements.txt index 355c0e2c..2ec3ac8d 100644 --- a/extra_requirements.txt +++ b/extra_requirements.txt @@ -2,6 +2,5 @@ certauth youtube-dl boto3 uwsgi -git+https://github.com/t0m/pyamf.git@python3 git+https://github.com/esnme/ultrajson.git pysocks diff --git a/pywb/rewrite/rewrite_amf.py b/pywb/rewrite/rewrite_amf.py index 54a1d007..e07501bf 100644 --- a/pywb/rewrite/rewrite_amf.py +++ b/pywb/rewrite/rewrite_amf.py @@ -43,6 +43,8 @@ class RewriteAMF(BufferedRewriter): #pragma: no cover import traceback traceback.print_exc() print(e) + + stream.seek(0) return stream diff --git a/pywb/warcserver/amf.py b/pywb/warcserver/amf.py new file mode 100644 index 00000000..d872ea6a --- /dev/null +++ b/pywb/warcserver/amf.py @@ -0,0 +1,64 @@ +import json +import six +from pyamf.remoting import Envelope, Request +from pyamf.flex.messaging import RemotingMessage + + +class Amf: + + @staticmethod + def get_representation(request_object, max_calls=500): + + max_calls = max_calls - 1 + + if max_calls < 0: + raise Exception("Amf.get_representation maximum number of calls reached") + + if isinstance(request_object, Envelope): + # Remove order of Request + bodies = [] + for i in request_object.bodies: + bodies.append(Amf.get_representation(i[1], max_calls)) + bodies = sorted(bodies) + + return "{bodies}".format(bodies="[" + ",".join(bodies) + "]") + + elif isinstance(request_object, Request): + # Remove cyclic reference + target = request_object.target + body = Amf.get_representation(request_object.body, max_calls) + return "{body}".format(**locals()) + + elif isinstance(request_object, RemotingMessage): + # Remove random properties + operation = request_object.operation + body = Amf.get_representation(request_object.body, max_calls) + return "{body}".format(**locals()) + + elif isinstance(request_object, dict): + return json.dumps(request_object, sort_keys=True) + + elif isinstance(request_object, list): + bodies = [] + for i in request_object: + bodies.append(Amf.get_representation(i, max_calls)) + return "[" + ",".join(bodies) + "]" + + elif isinstance(request_object, six.string_types): + return request_object + + elif request_object is None: + return "" + + elif isinstance(request_object, object) and hasattr(request_object, "__dict__"): + classname = request_object.__class__.__name__ + properties = request_object.__dict__ + bodies = dict() + for prop in properties: + bodies[prop] = Amf.get_representation(getattr(request_object, prop), max_calls) + bodies = Amf.get_representation(bodies, max_calls) + + return '<{classname}>{bodies}'.format(**locals()) + + else: + return repr(request_object) diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index 5e04a0ef..2ee3cd7d 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -1,6 +1,7 @@ from warcio.limitreader import LimitReader from warcio.statusandheaders import StatusAndHeadersParser - +from pywb.warcserver.amf import Amf +from pyamf.remoting import decode from warcio.utils import to_native_str from six.moves.urllib.parse import urlsplit, quote, unquote_plus, urlencode @@ -11,7 +12,6 @@ import base64 import cgi - #============================================================================= class DirectWSGIInputRequest(object): def __init__(self, env): @@ -264,30 +264,8 @@ class MethodQueryCanonicalizer(object): def amf_parse(self, string, environ): try: - from pyamf import remoting - - res = remoting.decode(BytesIO(string)) - - #print(res) - body = res.bodies[0][1].body[0] - - values = {} - - if hasattr(body, 'body'): - values['body'] = body.body - - if hasattr(body, 'source'): - values['source'] = body.source - - if hasattr(body, 'operation'): - values['op'] = body.operation - - if environ is not None: - environ['pywb.inputdata'] = res - - query = urlencode(values) - #print(query) - return query + res = decode(BytesIO(string)) + return urlencode({"request": Amf.get_representation(res)}) except Exception as e: import traceback diff --git a/pywb/warcserver/test/test_amf.py b/pywb/warcserver/test/test_amf.py new file mode 100644 index 00000000..c0e87203 --- /dev/null +++ b/pywb/warcserver/test/test_amf.py @@ -0,0 +1,81 @@ +from pywb.warcserver.amf import Amf + +import pyamf +import uuid + +from io import BytesIO +from pyamf.remoting import Envelope, Request, encode, decode +from pyamf.flex.messaging import RemotingMessage + + +class CustomObject: + secret = None + + +pyamf.register_class(CustomObject, "custom.object") + + +def generate_amf_request(request_body=None): + req = Request(target='UserService', body=request_body) + ev = Envelope(pyamf.AMF3) + ev['/0'] = req + + return encode(ev).getvalue() + + +def generate_flex_request(message_body=None): + msg = RemotingMessage(operation='retrieveUser', + messageId=str(uuid.uuid4()).upper(), + body=message_body) + return generate_amf_request([msg]) + + +class TestAmf(object): + + def test_can_parse_custom_object(self): + a = CustomObject() + a.secret = "a" + + encoded = generate_amf_request(request_body=[a]) + decoded = decode(BytesIO(encoded)) + + assert Amf.get_representation(decoded) == \ + '[[{"secret": "a"}]]' + + def test_parse_amf_request_with_envelope(self): + encoded = generate_amf_request([{"the": "body"}]) + decoded = decode(BytesIO(encoded)) + assert Amf.get_representation(decoded) == \ + '[[{"the": "body"}]]' + + def test_parse_flex_request_with_envelope(self): + encoded = generate_flex_request([{"the": "body"}]) + decoded = decode(BytesIO(encoded)) + assert Amf.get_representation(decoded) == \ + '[[[{"the": "body"}]]]' + + def test_position_in_dict_ignored(self): + a = Request(target=None, body={"a": 1, "b": 2}) + b = Request(target=None, body={"b": 2, "a": 1}) + c = Request(target=None, body={"a": 2, "b": 1}) + + assert Amf.get_representation(a) == Amf.get_representation(b) + assert Amf.get_representation(a) != Amf.get_representation(c) + + def test_order_of_array_preserved(self): + a = Request(target=None, body=[1, 2]) + b = Request(target=None, body=[2, 1]) + + assert Amf.get_representation(a) != Amf.get_representation(b) + + def test_limit_recursive_calls(self): + a = CustomObject() + a.secret = a + + encoded = generate_amf_request(request_body=[a]) + decoded = decode(BytesIO(encoded)) + try: + Amf.get_representation(decoded) + assert False, "should not be called" + except Exception as e: + assert "maximum number of calls reached" in str(e) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 2936e424..b4f275ac 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -2,9 +2,10 @@ from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputReques from werkzeug.routing import Map, Rule import webtest -import traceback from six.moves.urllib.parse import parse_qsl from io import BytesIO +from pyamf import AMF3 +from pyamf.remoting import Request, Envelope, encode #============================================================================= @@ -142,4 +143,16 @@ class TestPostQueryExtract(object): mq = MethodQueryCanonicalizer('HEAD', '', 0, BytesIO()) assert mq.append_query('http://example.com/') == 'http://example.com/?__pywb_method=head' + def test_amf_parse(self): + mq = MethodQueryCanonicalizer('POST', 'application/x-amf', 0, BytesIO()) + req = Request(target='t', body="") + ev_1 = Envelope(AMF3) + ev_1['/0'] = req + + req = Request(target='t', body="alt_content") + ev_2 = Envelope(AMF3) + ev_2['/0'] = req + + assert mq.amf_parse(encode(ev_1).getvalue(), None) != \ + mq.amf_parse(encode(ev_2).getvalue(), None) diff --git a/setup.py b/setup.py index a2312fff..4574665b 100755 --- a/setup.py +++ b/setup.py @@ -57,8 +57,12 @@ def generate_git_hash_py(pkg, filename='git_hash.py'): def load_requirements(filename): with open(filename, 'rt') as fh: - return fh.read().rstrip().split('\n') - + requirements = fh.read().rstrip().split('\n') + if (sys.version_info > (3, 0)): + requirements.append("py3AMF") + else: + requirements.append("pyAMF") + return requirements def get_package_data(): pkgs = ['static/*.*',