From a301dda0fb77304a17861ce58b1a596699c523ab Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 25 Feb 2018 00:21:05 -0800 Subject: [PATCH] memento prefer header improvements: (ukwa/ukwa-pywb#12) - support Prefer on top-frame url in framed mode, Prefer check runs before custom response - update Prefer test fixtures to test framed vs frameless and no-mod vs mp_ modifier, all combinations --- pywb/apps/rewriterapp.py | 43 +++++++-------- tests/test_prefer_header.py | 104 +++++++++++++++++++++++------------- 2 files changed, 89 insertions(+), 58 deletions(-) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 2b93272b..5c0ca1cc 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -261,9 +261,28 @@ class RewriterApp(object): 'pywb.static_prefix', '/static/') is_proxy = ('wsgiprox.proxy_host' in environ) - response = self.handle_custom_response(environ, wb_url, - full_prefix, host_prefix, - kwargs) + # Check Prefer + pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ) + + response = None + + # prefer overrides custom response? + if pref_mod is not None: + # fast-redirect to preferred + if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod: + new_url = full_prefix + wb_url.to_str(mod=pref_mod) + headers = [('Preference-Applied', pref_applied), + ('Vary', 'Prefer')] + + return WbResponse.redir_response(new_url, + '307 Temporary Redirect', + headers=headers) + else: + wb_url.mod = pref_mod + else: + response = self.handle_custom_response(environ, wb_url, + full_prefix, host_prefix, + kwargs) if response: return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy) @@ -286,24 +305,6 @@ class RewriterApp(object): if not url_parts.path: return self.send_redirect('/', url_parts, urlrewriter) - # Check Prefer - pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ) - - # fast-redirect to preferred - if pref_mod is not None: - if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod: - new_url = urlrewriter.get_new_url(url=wb_url.url, - timestamp=wb_url.timestamp, - mod=pref_mod) - headers = [('Preference-Applied', pref_applied), - ('Vary', 'Prefer')] - - return WbResponse.redir_response(new_url, - '307 Temporary Redirect', - headers=headers) - else: - wb_url.mod = pref_mod - self.unrewrite_referrer(environ, full_prefix) urlkey = canonicalize(wb_url.url) diff --git a/tests/test_prefer_header.py b/tests/test_prefer_header.py index ff672100..22279c6f 100644 --- a/tests/test_prefer_header.py +++ b/tests/test_prefer_header.py @@ -1,30 +1,62 @@ from .base_config_test import BaseConfigTest, fmod from pywb.warcserver.index.cdxobject import CDXObject +import pytest # ============================================================================ -class TestPreferWithRedirects(BaseConfigTest): - @classmethod - def setup_class(cls): - super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml') +@pytest.fixture(params=[('mp_', 'mp_'), + ('', 'mp_'), + ('mp_', ''), + ('', '')], + + ids=['framed-mp', + 'framed', + 'non-framed-mp', + 'non-frame']) +def fmod(request): + return request.param + + +# ============================================================================ +class BasePreferTests(BaseConfigTest): + def get(self, url, param, *args, **kwargs): + app = self.testapp if param[0] else self.testapp_non_frame + return app.get(self.format(url, param[1], with_slash=kwargs.pop('with_slash', False)), *args, **kwargs) + + def format(self, string, fmod, with_slash=False): + if with_slash and fmod: + fmod += '/' + + return string.format(fmod) def _assert_pref_headers(self, resp, pref): assert resp.headers['Preference-Applied'] == pref assert 'Prefer' in resp.headers['Vary'] - def _assert_raw_memento(self, resp): + def _assert_raw(self, resp): self._assert_pref_headers(resp, 'raw') assert '"/time-zones"' in resp.text, resp.text assert 'wombat.js' not in resp.text - def _assert_rewritten(self, resp, fmod): + def _assert_rewritten(self, resp): self._assert_pref_headers(resp, 'rewritten') assert '"20140127171238"' in resp.text assert 'wombat.js' in resp.text assert 'new _WBWombat' in resp.text, resp.text - assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text + + +# ============================================================================ +class TestPreferWithRedirects(BasePreferTests): + @classmethod + def setup_class(cls): + super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml') + + def _assert_rewritten(self, resp, fmod): + super(TestPreferWithRedirects, self)._assert_rewritten(resp) + + self.format('/20140127171238{0}/http://www.iana.org/time-zones"', fmod[0]) in resp.text def _assert_redir_to_raw(self, resp): self._assert_pref_headers(resp, 'raw') @@ -32,27 +64,25 @@ class TestPreferWithRedirects(BaseConfigTest): assert resp.location.endswith('/pywb/20140127171238id_/http://www.iana.org/') resp = resp.follow() - self._assert_raw_memento(resp) + self._assert_raw(resp) def _assert_redir_to_rewritten(self, resp, fmod): self._assert_pref_headers(resp, 'rewritten') - assert resp.location.endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod)) + assert resp.location.endswith(self.format('/pywb/20140127171238{0}/http://www.iana.org/', fmod[0])) resp = resp.follow() self._assert_rewritten(resp, fmod) def test_prefer_redir_timegate_raw(self, fmod): headers = {'Prefer': 'raw'} - fmod_slash = fmod + '/' if fmod else '' - resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307) + resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=307) self._assert_redir_to_raw(resp) def test_prefer_redir_timegate_rewritten(self, fmod): headers = {'Prefer': 'rewritten'} - fmod_slash = fmod + '/' if fmod else '' - resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307) + resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=307) self._assert_redir_to_rewritten(resp, fmod) @@ -68,17 +98,28 @@ class TestPreferWithRedirects(BaseConfigTest): self._assert_redir_to_rewritten(resp, fmod) - def test_prefer_redir_memento_matches_rewritten(self, fmod): + def test_prefer_redir_memento_rewritten_matches_or_redir(self, fmod): headers = {'Prefer': 'rewritten'} - resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200) - self._assert_rewritten(resp, fmod) + url = '/pywb/20140127171238{0}/http://www.iana.org/' + + # if framed mode and mp_, or non-framed mode and blank mod, already at canonical url + # no redirect + if fmod[0] == fmod[1]: + resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200) + + self._assert_rewritten(resp, fmod) + # otherwise, if framed and blank mod, or non-framed and mp_ mod, redirect to canonical url + else: + resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=307) + + self._assert_redir_to_rewritten(resp, fmod) def test_prefer_redir_memento_matches_raw(self): headers = {'Prefer': 'raw'} resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200) - self._assert_raw_memento(resp) + self._assert_raw(resp) def test_prefer_redir_invalid(self, fmod): headers = {'Prefer': 'unknown'} @@ -86,45 +127,33 @@ class TestPreferWithRedirects(BaseConfigTest): # ============================================================================ -class TestPreferWithNoRedirects(BaseConfigTest): +class TestPreferWithNoRedirects(BasePreferTests): @classmethod def setup_class(cls): super(TestPreferWithNoRedirects, cls).setup_class('config_test.yaml', custom_config={'enable_prefer': True}) - def _assert_pref_headers(self, resp, pref): - assert resp.headers['Preference-Applied'] == pref - assert 'Prefer' in resp.headers['Vary'] - def _assert_raw(self, resp): - self._assert_pref_headers(resp, 'raw') - assert '"/time-zones"' in resp.text, resp.text - assert 'wombat.js' not in resp.text + super(TestPreferWithNoRedirects, self)._assert_raw(resp) assert resp.headers['Content-Location'].endswith('/pywb/20140127171238id_/http://www.iana.org/') def _assert_rewritten(self, resp, fmod): - self._assert_pref_headers(resp, 'rewritten') + super(TestPreferWithNoRedirects, self)._assert_rewritten(resp) - assert '"20140127171238"' in resp.text - assert 'wombat.js' in resp.text - assert 'new _WBWombat' in resp.text, resp.text - - assert resp.headers['Content-Location'].endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod)) + assert resp.headers['Content-Location'].endswith(self.format('/pywb/20140127171238{0}/http://www.iana.org/', fmod[0])) def test_prefer_timegate_raw(self, fmod): headers = {'Prefer': 'raw'} - fmod_slash = fmod + '/' if fmod else '' - resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200) + resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=200) self._assert_raw(resp) def test_prefer_timegate_rewritten(self, fmod): headers = {'Prefer': 'rewritten'} - fmod_slash = fmod + '/' if fmod else '' - resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200) + resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=200) - assert '/pywb/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text + assert self.format('/pywb/{0}http://www.iana.org/time-zones"', fmod[0], with_slash=True) in resp.text self._assert_rewritten(resp, fmod) def test_prefer_memento_raw(self, fmod): @@ -137,6 +166,7 @@ class TestPreferWithNoRedirects(BaseConfigTest): headers = {'Prefer': 'rewritten'} resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200) + assert self.format('/pywb/20140127171238{0}/http://www.iana.org/time-zones"', fmod[0]) in resp.text self._assert_rewritten(resp, fmod) def test_prefer_memento_raw_id_mod(self): @@ -151,7 +181,7 @@ class TestPreferWithNoRedirects(BaseConfigTest): self._assert_rewritten(resp, fmod) - def test_prefer_memento_rewritten_diff_mod(self): + def test_prefer_memento_rewritten_diff_mod(self, fmod): headers = {'Prefer': 'raw'} resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=200)