From a3294c8b251dce7bf66c7b3b7042e38b2c3c39ba Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 11 Sep 2019 09:03:55 -0700 Subject: [PATCH] fix exception handling: - don't rethrow HTTPException from WbException - catch RequestRedirect to issue 307 redirect, check referrer - tests: add referrer redirect tests with missing slash defaults: don't enable new transclusions by default --- pywb/apps/frontendapp.py | 17 +++++++---------- pywb/templates/head_insert.html | 2 +- tests/test_integration.py | 9 ++++++++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 98b3dcff..9068b4ee 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -1,7 +1,6 @@ from gevent.monkey import patch_all; patch_all() -from werkzeug.routing import Map, Rule -from werkzeug.exceptions import HTTPException +from werkzeug.routing import Map, Rule, RequestRedirect from werkzeug.wsgi import pop_path_info from six.moves.urllib.parse import urljoin from six import iteritems @@ -418,12 +417,8 @@ class FrontEndApp(object): metadata['output'] = timemap_output # ensure that the timemap path information is not included wb_url_str = wb_url_str.replace('timemap/{0}/'.format(timemap_output), '') - try: - response = self.rewriterapp.render_content(wb_url_str, metadata, environ) - except WbException as wbe: - response = self.rewriterapp.handle_error(environ, wbe) - raise HTTPException(response=response) - return response + + return self.rewriterapp.render_content(wb_url_str, metadata, environ) def setup_paths(self, environ, coll, record=False): """Populates the WSGI environment dictionary with the path information necessary to perform a response for @@ -553,12 +548,14 @@ class FrontEndApp(object): response = endpoint(environ, **args) - except HTTPException as hte: + except RequestRedirect as rr: + # if werkzeug throws this, likely a missing slash redirect + # also check referrer here to avoid another redirect later redir = self._check_refer_redirect(environ) if redir: return redir(environ, start_response) - response = hte + response = WbResponse.redir_response(rr.new_url, '307 Redirect') except WbException as wbe: if wbe.status_code == 404: diff --git a/pywb/templates/head_insert.html b/pywb/templates/head_insert.html index e6deac41..924c46b1 100644 --- a/pywb/templates/head_insert.html +++ b/pywb/templates/head_insert.html @@ -54,7 +54,7 @@ {% if config.enable_flash_video_rewrite or config.transclusions_version == 1 %} -{% elif config.transclusions_version | default(2) == 2 %} +{% elif config.transclusions_version == 2 %} {% endif %} diff --git a/tests/test_integration.py b/tests/test_integration.py index 211d19a8..5670cd1e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -389,7 +389,7 @@ class TestWbIntegration(BaseConfigTest): resp = self.post_json('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^'}, status=404) assert resp.status_int == 404 - def test_post_referer_redirect(self): + def test_post_referer_redirect(self, fmod): # allowing 307 redirects resp = self.post('/post', fmod, {'foo': 'bar', 'test': 'abc'}, @@ -398,6 +398,13 @@ class TestWbIntegration(BaseConfigTest): assert resp.status_int == 307 assert resp.headers['Location'].endswith('/pywb/2014{0}/http://httpbin.org/post'.format(fmod)) + def test_get_referer_redirect(self, fmod): + resp = self.get('/get', fmod, + headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://httpbin.org/foo'.format(fmod))]) + + assert resp.status_int == 307 + assert resp.headers['Location'].endswith('/pywb/2014{0}/http://httpbin.org/get'.format(fmod)) + def _test_excluded_content(self): fmod_slash = fmod + '/' if fmod else '' resp = self.get('/pywb/{0}http://www.iana.org/_img/bookmark_icon.ico', fmod_slash, status=403)