1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

webagg: use werkzeug routing instead of wrapping Bottle app

This commit is contained in:
Ilya Kreymer 2016-05-10 16:31:44 -07:00
parent 464eca2fa0
commit c45f5cb749
6 changed files with 99 additions and 75 deletions

View File

@ -1,98 +1,115 @@
from webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest
from bottle import route, request, response, abort, Bottle, debug as bottle_debug
from werkzeug.routing import Map, Rule
import requests
import traceback
import json
from six.moves.urllib.parse import parse_qsl
JSON_CT = 'application/json; charset=utf-8'
#=============================================================================
class ResAggApp(object):
def __init__(self, *args, **kwargs):
self.application = Bottle()
self.application.default_error_handler = self.err_handler
self.route_dict = {}
self.debug = kwargs.get('debug', False)
if self.debug:
bottle_debug(True)
self.url_map = Map()
@self.application.route('/')
def list_routes():
return self.route_dict
def list_routes(environ):
return {}, self.route_dict, {}
self.url_map.add(Rule('/', endpoint=list_routes))
def add_route(self, path, handler):
@self.application.route([path, path + '/<mode:path>'], 'ANY')
@self.wrap_error
def direct_input_request(mode=''):
params = dict(request.query)
def direct_input_request(environ, mode=''):
params = self.get_query_dict(environ)
params['mode'] = mode
params['_input_req'] = DirectWSGIInputRequest(request.environ)
params['_input_req'] = DirectWSGIInputRequest(environ)
return handler(params)
@self.application.route([path + '/postreq', path + '/<mode:path>/postreq'], 'POST')
@self.wrap_error
def post_fullrequest(mode=''):
params = dict(request.query)
def post_fullrequest(environ, mode=''):
params = self.get_query_dict(environ)
params['mode'] = mode
params['_input_req'] = POSTInputRequest(request.environ)
params['_input_req'] = POSTInputRequest(environ)
return handler(params)
self.url_map.add(Rule(path, endpoint=direct_input_request))
self.url_map.add(Rule(path + '/<path:mode>', endpoint=direct_input_request))
self.url_map.add(Rule(path + '/postreq', endpoint=post_fullrequest))
self.url_map.add(Rule(path + '/<path:mode>/postreq', endpoint=post_fullrequest))
handler_dict = handler.get_supported_modes()
self.route_dict[path] = handler_dict
self.route_dict[path + '/postreq'] = handler_dict
def err_handler(self, exc):
if self.debug:
print(exc)
traceback.print_exc()
response.status = exc.status_code
response.content_type = JSON_CT
err_msg = json.dumps({'message': exc.body})
response.headers['ResErrors'] = err_msg
return err_msg
def get_query_dict(self, environ):
query_str = environ.get('QUERY_STRING')
if query_str:
return dict(parse_qsl(query_str))
else:
return {}
def wrap_error(self, func):
def wrap_func(*args, **kwargs):
try:
out_headers, res, errs = func(*args, **kwargs)
def __call__(self, environ, start_response):
urls = self.url_map.bind_to_environ(environ)
try:
endpoint, args = urls.match()
except HTTPException as e:
return e(environ, start_response)
if out_headers:
for n, v in out_headers.items():
response.headers[n] = v
try:
result = endpoint(environ, **args)
if res:
if errs:
response.headers['ResErrors'] = json.dumps(errs)
return res
out_headers, res, errs = result
last_exc = errs.pop('last_exc', None)
if last_exc:
if self.debug:
traceback.print_exc()
if res:
if isinstance(res, dict):
res = json.dumps(res).encode('utf-8')
out_headers['Content-Type'] = JSON_CT
out_headers['Content-Length'] = str(len(res))
res = [res]
response.status = last_exc.status()
message = last_exc.msg
else:
response.status = 404
message = 'No Resource Found'
response.content_type = JSON_CT
res = {'message': message}
if errs:
res['errors'] = errs
out_headers['ResErrors'] = json.dumps(errs)
err_msg = json.dumps(res)
response.headers['ResErrors'] = err_msg
return err_msg
start_response('200 OK', list(out_headers.items()))
return res
except Exception as e:
if self.debug:
traceback.print_exc()
abort(500, 'Internal Error: ' + str(e))
else:
return self.send_error(out_headers, errs, start_response)
return wrap_func
except Exception as e:
message = 'Internal Error: ' + str(e)
status = 500
return self.send_error({}, {}, start_response,
message=message,
status=status)
def send_error(self, out_headers, errs, start_response,
message='No Resource Found', status=404):
last_exc = errs.pop('last_exc', None)
if last_exc:
if self.debug:
traceback.print_exc()
status = last_exc.status()
message = last_exc.msg
res = {'message': message}
if errs:
res['errors'] = errs
err_msg = json.dumps(res)
headers = [('Content-Type', JSON_CT),
('Content-Length', str(len(err_msg))),
('ResErrors', err_msg)
]
start_response(str(status) + ' ' + message, headers)
return [err_msg.encode('utf-8')]

View File

@ -111,7 +111,14 @@ class IndexHandler(object):
content_type, res = handler(cdx_iter, fields)
out_headers = {'Content-Type': content_type}
return out_headers, res, errs
def check_str(res):
for x in res:
if isinstance(x, str):
x = x.encode('utf-8')
yield x
return out_headers, check_str(res), errs
#=============================================================================

View File

@ -62,7 +62,7 @@ class TestResAgg(FakeRedisTests, BaseTestClass):
app.add_route('/empty', HandlerSeq([]))
app.add_route('/invalid', DefaultResourceHandler([SimpleAggregator({'invalid': 'should not be a callable'})]))
cls.testapp = webtest.TestApp(app.application)
cls.testapp = webtest.TestApp(app)
def _check_uri_date(self, resp, uri, dt):
buff = BytesIO(resp.body)

View File

@ -129,13 +129,13 @@ def test_handler_output_cdxj():
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
exp = """\
exp = b"""\
com,vvork)/ 20141006184357 {"url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
com,vvork)/ 20131004231540 {"url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
assert(headers['Content-Type'] == 'text/x-cdxj')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
assert(errs == {})
@ -145,13 +145,13 @@ def test_handler_output_json():
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='json'))
exp = """\
exp = b"""\
{"urlkey": "com,vvork)/", "timestamp": "20141006184357", "url": "http://www.vvork.com/", "mem_rel": "memento", "memento_url": "http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/", "load_url": "http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/", "source": "rhiz"}
{"urlkey": "com,vvork)/", "timestamp": "20131004231540", "url": "http://vvork.com/", "mem_rel": "last memento", "memento_url": "http://wayback.archive-it.org/all/20131004231540/http://vvork.com/", "load_url": "http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/", "source": "ait"}
"""
assert(headers['Content-Type'] == 'application/x-ndjson')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
assert(errs == {})
def test_handler_output_link():
@ -160,12 +160,12 @@ def test_handler_output_link():
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='link'))
exp = """\
exp = b"""\
<http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/>; rel="memento"; datetime="Mon, 06 Oct 2014 18:43:57 GMT"; src="rhiz",
<http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/>; rel="memento"; datetime="Fri, 04 Oct 2013 23:15:40 GMT"; src="ait"
"""
assert(headers['Content-Type'] == 'application/link')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
assert(errs == {})
@ -175,7 +175,7 @@ def test_handler_output_link_2():
url = 'http://iana.org/'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = """\
exp = b"""\
<http://web.archive.org/web/20140126093743id_/http://iana.org/>; rel="memento"; datetime="Sun, 26 Jan 2014 09:37:43 GMT"; src="ia",
<file://iana.warc.gz:334:2258>; rel="memento"; datetime="Sun, 26 Jan 2014 20:06:24 GMT"; src="local",
<http://web.archive.org/web/20140123034755id_/http://iana.org/>; rel="memento"; datetime="Thu, 23 Jan 2014 03:47:55 GMT"; src="ia",
@ -183,7 +183,7 @@ def test_handler_output_link_2():
<http://wayback.archive-it.org/all/20140107040552id_/http://iana.org/>; rel="memento"; datetime="Tue, 07 Jan 2014 04:05:52 GMT"; src="ait"
"""
assert(headers['Content-Type'] == 'application/link')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
exp_errs = {'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://iana.org/',)",
'rhiz': "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@ -198,10 +198,10 @@ def test_handler_output_link_3():
url = 'http://foo.bar.non-existent'
headers, res, errs = handler(dict(url=url, closest='20140126000000', limit=5, output='link'))
exp = ''
exp = b''
assert(headers['Content-Type'] == 'application/link')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
exp_errs = {'ait': "NotFoundException('http://wayback.archive-it.org/all/http://foo.bar.non-existent',)",
'bl': "NotFoundException('http://www.webarchive.org.uk/wayback/archive/http://foo.bar.non-existent',)",
@ -216,12 +216,12 @@ def test_handler_output_text():
url = 'http://vvork.com/'
headers, res, errs = handler(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait', output='text'))
exp = """\
exp = b"""\
com,vvork)/ 20141006184357 http://www.vvork.com/ memento http://webenact.rhizome.org/vvork/20141006184357/http://www.vvork.com/ http://webenact.rhizome.org/vvork/20141006184357id_/http://www.vvork.com/ rhiz
com,vvork)/ 20131004231540 http://vvork.com/ last memento http://wayback.archive-it.org/all/20131004231540/http://vvork.com/ http://wayback.archive-it.org/all/20131004231540id_/http://vvork.com/ ait
"""
assert(headers['Content-Type'] == 'text/plain')
assert(''.join(res) == exp)
assert(b''.join(res) == exp)
assert(errs == {})

View File

@ -31,7 +31,7 @@ class TestUpstream(LiveServerTests, BaseTestClass):
)
self.base_url = base_url
self.testapp = webtest.TestApp(app.application)
self.testapp = webtest.TestApp(app)
def test_live_paths(self):

View File

@ -99,7 +99,7 @@ class LiveServerTests(object):
{'live': LiveIndexSource()})
)
)
return app.application
return app
@classmethod
def teardown_class(cls):