From e060ea7b56c9b205cbbd6b9fdd8a4808896f9ed9 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 21 Apr 2017 15:37:21 -0700 Subject: [PATCH] frontendapp: encapsulate, don't extend rewriterapp rewriterapp: add 'Content-Location' if fuzzy match, or if using memento tests: fix test to check for Content-Location for fuzzy match instead of redirect --- pywb/urlrewrite/frontendapp.py | 27 +++++++++++++++------------ pywb/urlrewrite/rewriterapp.py | 11 +++++++++-- tests/test_integration.py | 5 +++-- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/pywb/urlrewrite/frontendapp.py b/pywb/urlrewrite/frontendapp.py index 95e5a6db..9ba9b1aa 100644 --- a/pywb/urlrewrite/frontendapp.py +++ b/pywb/urlrewrite/frontendapp.py @@ -27,13 +27,13 @@ class NewWbRequest(object): # ============================================================================ -class FrontEndApp(RewriterApp): +class FrontEndApp(object): def __init__(self, config_file='./config.yaml', custom_config=None): self.debug = True self.webagg = AutoConfigApp(config_file=config_file, custom_config=custom_config) - super(FrontEndApp, self).__init__(True, config=self.webagg.config) + self.rewriterapp = RewriterApp(True, config=self.webagg.config) self.webagg_server = GeventServer(self.webagg, port=0) @@ -46,7 +46,7 @@ class FrontEndApp(RewriterApp): self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing)) self.url_map.add(Rule('/', endpoint=self.serve_home)) - self.paths = self.get_upstream_paths(self.webagg_server.port) + self.rewriterapp.paths = self.get_upstream_paths(self.webagg_server.port) def get_upstream_paths(self, port): return {'replay-dyn': 'http://localhost:%s/_/resource/postreq?param.coll={coll}' % port, @@ -54,7 +54,7 @@ class FrontEndApp(RewriterApp): } def serve_home(self, environ): - home_view = BaseInsertView(self.jinja_env, 'new_index.html') + home_view = BaseInsertView(self.rewriterapp.jinja_env, 'new_index.html') routes = self.webagg.list_fixed_routes() + self.webagg.list_dynamic_routes() content = home_view.render_to_string(environ, routes=routes) @@ -64,14 +64,14 @@ class FrontEndApp(RewriterApp): try: return self.static_handler(NewWbRequest(environ, filepath, '')) except: - raise NotFound(response=self._error_response(environ, 'Static File Not Found: {0}'.format(filepath))) + self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath)) def serve_coll_page(self, environ, coll): if not self.is_valid_coll(coll): - raise NotFound(response=self._error_response(environ, 'No handler for "/{0}"'.format(coll))) + self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) wbrequest = NewWbRequest(environ, '', '/') - view = BaseInsertView(self.jinja_env, 'search.html') + view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') content = view.render_to_string(environ, wbrequest=wbrequest) return WbResponse.text_response(content, content_type='text/html; charset="utf-8"') @@ -87,12 +87,15 @@ class FrontEndApp(RewriterApp): return (coll in self.webagg.list_fixed_routes() or coll in self.webagg.list_dynamic_routes()) + def raise_not_found(self, environ, msg): + raise NotFound(response=self.rewriterapp._error_response(environ, msg)) + def serve_content(self, environ, coll='', url=''): if not self.is_valid_coll(coll): - raise NotFound(response=self._error_response(environ, 'No handler for "/{0}"'.format(coll))) + self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) pop_path_info(environ) - wb_url = self.get_wburl(environ) + wb_url = self.rewriterapp.get_wburl(environ) kwargs = {'coll': coll} @@ -102,9 +105,9 @@ class FrontEndApp(RewriterApp): kwargs['type'] = 'replay-dyn' try: - response = self.render_content(wb_url, kwargs, environ) + response = self.rewriterapp.render_content(wb_url, kwargs, environ) except UpstreamException as ue: - response = self.handle_error(environ, ue) + response = self.rewriterapp.handle_error(environ, ue) raise HTTPException(response=response) return response @@ -156,7 +159,7 @@ class FrontEndApp(RewriterApp): if self.debug: traceback.print_exc() - response = self._error_response(environ, 'Internal Error: ' + str(e), '500 Server Error') + response = self.rewriterapp._error_response(environ, 'Internal Error: ' + str(e), '500 Server Error') return response(environ, start_response) @classmethod diff --git a/pywb/urlrewrite/rewriterapp.py b/pywb/urlrewrite/rewriterapp.py index 6ce69e2c..b732cdee 100644 --- a/pywb/urlrewrite/rewriterapp.py +++ b/pywb/urlrewrite/rewriterapp.py @@ -216,8 +216,12 @@ class RewriterApp(object): cdx['timestamp'] = http_date_to_timestamp(memento_dt) cdx['url'] = target_uri - # Disable Fuzzy Match Redir - #if target_uri != wb_url.url and r.headers.get('WebAgg-Fuzzy-Match') == '1': + set_content_loc = False + + # Check if Fuzzy Match + if target_uri != wb_url.url and r.headers.get('WebAgg-Fuzzy-Match') == '1': + set_content_loc = True + # return WbResponse.redir_response(urlrewriter.rewrite(target_uri), # '307 Temporary Redirect') @@ -272,6 +276,9 @@ class RewriterApp(object): if not is_ajax and self.enable_memento: self._add_memento_links(urlrewriter, full_prefix, memento_dt, status_headers) + set_content_loc = True + + if set_content_loc: status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'], url=cdx['url']))) #gen = buffer_iter(status_headers, gen) diff --git a/tests/test_integration.py b/tests/test_integration.py index 0abb3339..0fce3b65 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -106,8 +106,9 @@ class TestWbIntegration(BaseConfigTest): def test_replay_fuzzy_1(self): resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/?_=123') - assert resp.status_int == 307 - assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://www.iana.org/') + assert resp.status_int == 200 + assert resp.headers['Content-Location'].endswith('/pywb/20140126200624mp_/http://www.iana.org/') + #assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://www.iana.org/') def test_replay_no_fuzzy_match(self): resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/?foo=bar', status=404)