From c45f5cb749b859c633b3ab8c8581c582f518612b Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 10 May 2016 16:31:44 -0700 Subject: [PATCH] webagg: use werkzeug routing instead of wrapping Bottle app --- webagg/app.py | 135 ++++++++++++++++++-------------- webagg/handlers.py | 9 ++- webagg/test/test_handlers.py | 2 +- webagg/test/test_memento_agg.py | 24 +++--- webagg/test/test_upstream.py | 2 +- webagg/test/testutils.py | 2 +- 6 files changed, 99 insertions(+), 75 deletions(-) diff --git a/webagg/app.py b/webagg/app.py index 595cef7f..c640b654 100644 --- a/webagg/app.py +++ b/webagg/app.py @@ -1,98 +1,115 @@ from webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest -from bottle import route, request, response, abort, Bottle, debug as bottle_debug +from werkzeug.routing import Map, Rule import requests import traceback import json +from six.moves.urllib.parse import parse_qsl + JSON_CT = 'application/json; charset=utf-8' #============================================================================= class ResAggApp(object): def __init__(self, *args, **kwargs): - self.application = Bottle() - self.application.default_error_handler = self.err_handler self.route_dict = {} self.debug = kwargs.get('debug', False) - if self.debug: - bottle_debug(True) + self.url_map = Map() - @self.application.route('/') - def list_routes(): - return self.route_dict + def list_routes(environ): + return {}, self.route_dict, {} + + self.url_map.add(Rule('/', endpoint=list_routes)) def add_route(self, path, handler): - @self.application.route([path, path + '/'], 'ANY') - @self.wrap_error - def direct_input_request(mode=''): - params = dict(request.query) + def direct_input_request(environ, mode=''): + params = self.get_query_dict(environ) params['mode'] = mode - params['_input_req'] = DirectWSGIInputRequest(request.environ) + params['_input_req'] = DirectWSGIInputRequest(environ) return handler(params) - @self.application.route([path + '/postreq', path + '//postreq'], 'POST') - @self.wrap_error - def post_fullrequest(mode=''): - params = dict(request.query) + def post_fullrequest(environ, mode=''): + params = self.get_query_dict(environ) params['mode'] = mode - params['_input_req'] = POSTInputRequest(request.environ) + params['_input_req'] = POSTInputRequest(environ) return handler(params) + self.url_map.add(Rule(path, endpoint=direct_input_request)) + self.url_map.add(Rule(path + '/', endpoint=direct_input_request)) + + self.url_map.add(Rule(path + '/postreq', endpoint=post_fullrequest)) + self.url_map.add(Rule(path + '//postreq', endpoint=post_fullrequest)) + handler_dict = handler.get_supported_modes() + self.route_dict[path] = handler_dict self.route_dict[path + '/postreq'] = handler_dict - def err_handler(self, exc): - if self.debug: - print(exc) - traceback.print_exc() - response.status = exc.status_code - response.content_type = JSON_CT - err_msg = json.dumps({'message': exc.body}) - response.headers['ResErrors'] = err_msg - return err_msg + def get_query_dict(self, environ): + query_str = environ.get('QUERY_STRING') + if query_str: + return dict(parse_qsl(query_str)) + else: + return {} - def wrap_error(self, func): - def wrap_func(*args, **kwargs): - try: - out_headers, res, errs = func(*args, **kwargs) + def __call__(self, environ, start_response): + urls = self.url_map.bind_to_environ(environ) + try: + endpoint, args = urls.match() + except HTTPException as e: + return e(environ, start_response) - if out_headers: - for n, v in out_headers.items(): - response.headers[n] = v + try: + result = endpoint(environ, **args) - if res: - if errs: - response.headers['ResErrors'] = json.dumps(errs) - return res + out_headers, res, errs = result - last_exc = errs.pop('last_exc', None) - if last_exc: - if self.debug: - traceback.print_exc() + if res: + if isinstance(res, dict): + res = json.dumps(res).encode('utf-8') + out_headers['Content-Type'] = JSON_CT + out_headers['Content-Length'] = str(len(res)) + res = [res] - response.status = last_exc.status() - message = last_exc.msg - else: - response.status = 404 - message = 'No Resource Found' - - response.content_type = JSON_CT - res = {'message': message} if errs: - res['errors'] = errs + out_headers['ResErrors'] = json.dumps(errs) - err_msg = json.dumps(res) - response.headers['ResErrors'] = err_msg - return err_msg + start_response('200 OK', list(out_headers.items())) + return res - except Exception as e: - if self.debug: - traceback.print_exc() - abort(500, 'Internal Error: ' + str(e)) + else: + return self.send_error(out_headers, errs, start_response) - return wrap_func + except Exception as e: + message = 'Internal Error: ' + str(e) + status = 500 + return self.send_error({}, {}, start_response, + message=message, + status=status) + def send_error(self, out_headers, errs, start_response, + message='No Resource Found', status=404): + last_exc = errs.pop('last_exc', None) + if last_exc: + if self.debug: + traceback.print_exc() + + status = last_exc.status() + message = last_exc.msg + + res = {'message': message} + if errs: + res['errors'] = errs + + err_msg = json.dumps(res) + + headers = [('Content-Type', JSON_CT), + ('Content-Length', str(len(err_msg))), + ('ResErrors', err_msg) + ] + + start_response(str(status) + ' ' + message, headers) + return [err_msg.encode('utf-8')] diff --git a/webagg/handlers.py b/webagg/handlers.py index d6038fb2..9385e21c 100644 --- a/webagg/handlers.py +++ b/webagg/handlers.py @@ -111,7 +111,14 @@ class IndexHandler(object): content_type, res = handler(cdx_iter, fields) out_headers = {'Content-Type': content_type} - return out_headers, res, errs + + def check_str(res): + for x in res: + if isinstance(x, str): + x = x.encode('utf-8') + yield x + + return out_headers, check_str(res), errs #============================================================================= diff --git a/webagg/test/test_handlers.py b/webagg/test/test_handlers.py index 1872e896..7c5a1aff 100644 --- a/webagg/test/test_handlers.py +++ b/webagg/test/test_handlers.py @@ -62,7 +62,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass): app.add_route('/empty', HandlerSeq([])) app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})])) - cls.testapp = webtest.TestApp(app.application) + cls.testapp = webtest.TestApp(app) def _check_uri_date(self, resp, uri, dt): buff = BytesIO(resp.body) diff --git a/webagg/test/test_memento_agg.py b/webagg/test/test_memento_agg.py index 784bf785..2255b951 100644 --- a/webagg/test/test_memento_agg.py +++ b/webagg/test/test_memento_agg.py @@ -129,13 +129,13 @@ def test_handler_output_cdxj(): url = 'http://vvork.com/' headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) - exp = """\ + exp = b"""\ com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} """ assert(headers['Content-Type'] == 'text/x-cdxj') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) assert(errs == {}) @@ -145,13 +145,13 @@ def test_handler_output_json(): url = 'http://vvork.com/' headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json')) - exp = """\ + exp = b"""\ {"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"} {"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"} """ assert(headers['Content-Type'] == 'application/x-ndjson') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) assert(errs == {}) def test_handler_output_link(): @@ -160,12 +160,12 @@ def test_handler_output_link(): url = 'http://vvork.com/' headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link')) - exp = """\ + exp = b"""\ ; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz", ; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait" """ assert(headers['Content-Type'] == 'application/link') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) assert(errs == {}) @@ -175,7 +175,7 @@ def test_handler_output_link_2(): url = 'http://iana.org/' headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) - exp = """\ + exp = b"""\ ; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia", ; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local", ; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia", @@ -183,7 +183,7 @@ def test_handler_output_link_2(): ; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait" """ assert(headers['Content-Type'] == 'application/link') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)", 'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} @@ -198,10 +198,10 @@ def test_handler_output_link_3(): url = 'http://foo.bar.non-existent' headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link')) - exp = '' + exp = b'' assert(headers['Content-Type'] == 'application/link') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)", 'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)", @@ -216,12 +216,12 @@ def test_handler_output_text(): url = 'http://vvork.com/' headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text')) - exp = """\ + exp = b"""\ com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait """ assert(headers['Content-Type'] == 'text/plain') - assert(''.join(res) == exp) + assert(b''.join(res) == exp) assert(errs == {}) diff --git a/webagg/test/test_upstream.py b/webagg/test/test_upstream.py index 037b62e9..59854f90 100644 --- a/webagg/test/test_upstream.py +++ b/webagg/test/test_upstream.py @@ -31,7 +31,7 @@ class TestUpstream(LiveServerTests, BaseTestClass): ) self.base_url = base_url - self.testapp = webtest.TestApp(app.application) + self.testapp = webtest.TestApp(app) def test_live_paths(self): diff --git a/webagg/test/testutils.py b/webagg/test/testutils.py index d0fb361e..fc5a0a8a 100644 --- a/webagg/test/testutils.py +++ b/webagg/test/testutils.py @@ -99,7 +99,7 @@ class LiveServerTests(object): {'live': LiveIndexSource()}) ) ) - return app.application + return app @classmethod def teardown_class(cls):