mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
memento prefer header improvements: (ukwa/ukwa-pywb#12)
- support Prefer on top-frame url in framed mode, Prefer check runs before custom response - update Prefer test fixtures to test framed vs frameless and no-mod vs mp_ modifier, all combinations
This commit is contained in:
parent
5364275ef5
commit
a301dda0fb
@ -261,9 +261,28 @@ class RewriterApp(object):
|
|||||||
'pywb.static_prefix', '/static/')
|
'pywb.static_prefix', '/static/')
|
||||||
is_proxy = ('wsgiprox.proxy_host' in environ)
|
is_proxy = ('wsgiprox.proxy_host' in environ)
|
||||||
|
|
||||||
response = self.handle_custom_response(environ, wb_url,
|
# Check Prefer
|
||||||
full_prefix, host_prefix,
|
pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ)
|
||||||
kwargs)
|
|
||||||
|
response = None
|
||||||
|
|
||||||
|
# prefer overrides custom response?
|
||||||
|
if pref_mod is not None:
|
||||||
|
# fast-redirect to preferred
|
||||||
|
if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod:
|
||||||
|
new_url = full_prefix + wb_url.to_str(mod=pref_mod)
|
||||||
|
headers = [('Preference-Applied', pref_applied),
|
||||||
|
('Vary', 'Prefer')]
|
||||||
|
|
||||||
|
return WbResponse.redir_response(new_url,
|
||||||
|
'307 Temporary Redirect',
|
||||||
|
headers=headers)
|
||||||
|
else:
|
||||||
|
wb_url.mod = pref_mod
|
||||||
|
else:
|
||||||
|
response = self.handle_custom_response(environ, wb_url,
|
||||||
|
full_prefix, host_prefix,
|
||||||
|
kwargs)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||||
@ -286,24 +305,6 @@ class RewriterApp(object):
|
|||||||
if not url_parts.path:
|
if not url_parts.path:
|
||||||
return self.send_redirect('/', url_parts, urlrewriter)
|
return self.send_redirect('/', url_parts, urlrewriter)
|
||||||
|
|
||||||
# Check Prefer
|
|
||||||
pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ)
|
|
||||||
|
|
||||||
# fast-redirect to preferred
|
|
||||||
if pref_mod is not None:
|
|
||||||
if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod:
|
|
||||||
new_url = urlrewriter.get_new_url(url=wb_url.url,
|
|
||||||
timestamp=wb_url.timestamp,
|
|
||||||
mod=pref_mod)
|
|
||||||
headers = [('Preference-Applied', pref_applied),
|
|
||||||
('Vary', 'Prefer')]
|
|
||||||
|
|
||||||
return WbResponse.redir_response(new_url,
|
|
||||||
'307 Temporary Redirect',
|
|
||||||
headers=headers)
|
|
||||||
else:
|
|
||||||
wb_url.mod = pref_mod
|
|
||||||
|
|
||||||
self.unrewrite_referrer(environ, full_prefix)
|
self.unrewrite_referrer(environ, full_prefix)
|
||||||
|
|
||||||
urlkey = canonicalize(wb_url.url)
|
urlkey = canonicalize(wb_url.url)
|
||||||
|
@ -1,30 +1,62 @@
|
|||||||
from .base_config_test import BaseConfigTest, fmod
|
from .base_config_test import BaseConfigTest, fmod
|
||||||
|
|
||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class TestPreferWithRedirects(BaseConfigTest):
|
@pytest.fixture(params=[('mp_', 'mp_'),
|
||||||
@classmethod
|
('', 'mp_'),
|
||||||
def setup_class(cls):
|
('mp_', ''),
|
||||||
super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml')
|
('', '')],
|
||||||
|
|
||||||
|
ids=['framed-mp',
|
||||||
|
'framed',
|
||||||
|
'non-framed-mp',
|
||||||
|
'non-frame'])
|
||||||
|
def fmod(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class BasePreferTests(BaseConfigTest):
|
||||||
|
def get(self, url, param, *args, **kwargs):
|
||||||
|
app = self.testapp if param[0] else self.testapp_non_frame
|
||||||
|
return app.get(self.format(url, param[1], with_slash=kwargs.pop('with_slash', False)), *args, **kwargs)
|
||||||
|
|
||||||
|
def format(self, string, fmod, with_slash=False):
|
||||||
|
if with_slash and fmod:
|
||||||
|
fmod += '/'
|
||||||
|
|
||||||
|
return string.format(fmod)
|
||||||
|
|
||||||
def _assert_pref_headers(self, resp, pref):
|
def _assert_pref_headers(self, resp, pref):
|
||||||
assert resp.headers['Preference-Applied'] == pref
|
assert resp.headers['Preference-Applied'] == pref
|
||||||
assert 'Prefer' in resp.headers['Vary']
|
assert 'Prefer' in resp.headers['Vary']
|
||||||
|
|
||||||
def _assert_raw_memento(self, resp):
|
def _assert_raw(self, resp):
|
||||||
self._assert_pref_headers(resp, 'raw')
|
self._assert_pref_headers(resp, 'raw')
|
||||||
assert '"/time-zones"' in resp.text, resp.text
|
assert '"/time-zones"' in resp.text, resp.text
|
||||||
assert 'wombat.js' not in resp.text
|
assert 'wombat.js' not in resp.text
|
||||||
|
|
||||||
def _assert_rewritten(self, resp, fmod):
|
def _assert_rewritten(self, resp):
|
||||||
self._assert_pref_headers(resp, 'rewritten')
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
|
|
||||||
assert '"20140127171238"' in resp.text
|
assert '"20140127171238"' in resp.text
|
||||||
assert 'wombat.js' in resp.text
|
assert 'wombat.js' in resp.text
|
||||||
assert 'new _WBWombat' in resp.text, resp.text
|
assert 'new _WBWombat' in resp.text, resp.text
|
||||||
assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestPreferWithRedirects(BasePreferTests):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml')
|
||||||
|
|
||||||
|
def _assert_rewritten(self, resp, fmod):
|
||||||
|
super(TestPreferWithRedirects, self)._assert_rewritten(resp)
|
||||||
|
|
||||||
|
self.format('/20140127171238{0}/http://www.iana.org/time-zones"', fmod[0]) in resp.text
|
||||||
|
|
||||||
def _assert_redir_to_raw(self, resp):
|
def _assert_redir_to_raw(self, resp):
|
||||||
self._assert_pref_headers(resp, 'raw')
|
self._assert_pref_headers(resp, 'raw')
|
||||||
@ -32,27 +64,25 @@ class TestPreferWithRedirects(BaseConfigTest):
|
|||||||
assert resp.location.endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
assert resp.location.endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
|
|
||||||
self._assert_raw_memento(resp)
|
self._assert_raw(resp)
|
||||||
|
|
||||||
def _assert_redir_to_rewritten(self, resp, fmod):
|
def _assert_redir_to_rewritten(self, resp, fmod):
|
||||||
self._assert_pref_headers(resp, 'rewritten')
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
|
|
||||||
assert resp.location.endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
assert resp.location.endswith(self.format('/pywb/20140127171238{0}/http://www.iana.org/', fmod[0]))
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
|
|
||||||
self._assert_rewritten(resp, fmod)
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_redir_timegate_raw(self, fmod):
|
def test_prefer_redir_timegate_raw(self, fmod):
|
||||||
headers = {'Prefer': 'raw'}
|
headers = {'Prefer': 'raw'}
|
||||||
fmod_slash = fmod + '/' if fmod else ''
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=307)
|
||||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
|
||||||
|
|
||||||
self._assert_redir_to_raw(resp)
|
self._assert_redir_to_raw(resp)
|
||||||
|
|
||||||
def test_prefer_redir_timegate_rewritten(self, fmod):
|
def test_prefer_redir_timegate_rewritten(self, fmod):
|
||||||
headers = {'Prefer': 'rewritten'}
|
headers = {'Prefer': 'rewritten'}
|
||||||
fmod_slash = fmod + '/' if fmod else ''
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=307)
|
||||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
|
||||||
|
|
||||||
self._assert_redir_to_rewritten(resp, fmod)
|
self._assert_redir_to_rewritten(resp, fmod)
|
||||||
|
|
||||||
@ -68,17 +98,28 @@ class TestPreferWithRedirects(BaseConfigTest):
|
|||||||
|
|
||||||
self._assert_redir_to_rewritten(resp, fmod)
|
self._assert_redir_to_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_redir_memento_matches_rewritten(self, fmod):
|
def test_prefer_redir_memento_rewritten_matches_or_redir(self, fmod):
|
||||||
headers = {'Prefer': 'rewritten'}
|
headers = {'Prefer': 'rewritten'}
|
||||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
|
||||||
|
|
||||||
self._assert_rewritten(resp, fmod)
|
url = '/pywb/20140127171238{0}/http://www.iana.org/'
|
||||||
|
|
||||||
|
# if framed mode and mp_, or non-framed mode and blank mod, already at canonical url
|
||||||
|
# no redirect
|
||||||
|
if fmod[0] == fmod[1]:
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
# otherwise, if framed and blank mod, or non-framed and mp_ mod, redirect to canonical url
|
||||||
|
else:
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=307)
|
||||||
|
|
||||||
|
self._assert_redir_to_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_redir_memento_matches_raw(self):
|
def test_prefer_redir_memento_matches_raw(self):
|
||||||
headers = {'Prefer': 'raw'}
|
headers = {'Prefer': 'raw'}
|
||||||
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
||||||
|
|
||||||
self._assert_raw_memento(resp)
|
self._assert_raw(resp)
|
||||||
|
|
||||||
def test_prefer_redir_invalid(self, fmod):
|
def test_prefer_redir_invalid(self, fmod):
|
||||||
headers = {'Prefer': 'unknown'}
|
headers = {'Prefer': 'unknown'}
|
||||||
@ -86,45 +127,33 @@ class TestPreferWithRedirects(BaseConfigTest):
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class TestPreferWithNoRedirects(BaseConfigTest):
|
class TestPreferWithNoRedirects(BasePreferTests):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
super(TestPreferWithNoRedirects, cls).setup_class('config_test.yaml',
|
super(TestPreferWithNoRedirects, cls).setup_class('config_test.yaml',
|
||||||
custom_config={'enable_prefer': True})
|
custom_config={'enable_prefer': True})
|
||||||
|
|
||||||
def _assert_pref_headers(self, resp, pref):
|
|
||||||
assert resp.headers['Preference-Applied'] == pref
|
|
||||||
assert 'Prefer' in resp.headers['Vary']
|
|
||||||
|
|
||||||
def _assert_raw(self, resp):
|
def _assert_raw(self, resp):
|
||||||
self._assert_pref_headers(resp, 'raw')
|
super(TestPreferWithNoRedirects, self)._assert_raw(resp)
|
||||||
assert '"/time-zones"' in resp.text, resp.text
|
|
||||||
assert 'wombat.js' not in resp.text
|
|
||||||
|
|
||||||
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||||
|
|
||||||
def _assert_rewritten(self, resp, fmod):
|
def _assert_rewritten(self, resp, fmod):
|
||||||
self._assert_pref_headers(resp, 'rewritten')
|
super(TestPreferWithNoRedirects, self)._assert_rewritten(resp)
|
||||||
|
|
||||||
assert '"20140127171238"' in resp.text
|
assert resp.headers['Content-Location'].endswith(self.format('/pywb/20140127171238{0}/http://www.iana.org/', fmod[0]))
|
||||||
assert 'wombat.js' in resp.text
|
|
||||||
assert 'new _WBWombat' in resp.text, resp.text
|
|
||||||
|
|
||||||
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
|
||||||
|
|
||||||
def test_prefer_timegate_raw(self, fmod):
|
def test_prefer_timegate_raw(self, fmod):
|
||||||
headers = {'Prefer': 'raw'}
|
headers = {'Prefer': 'raw'}
|
||||||
fmod_slash = fmod + '/' if fmod else ''
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=200)
|
||||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
|
||||||
|
|
||||||
self._assert_raw(resp)
|
self._assert_raw(resp)
|
||||||
|
|
||||||
def test_prefer_timegate_rewritten(self, fmod):
|
def test_prefer_timegate_rewritten(self, fmod):
|
||||||
headers = {'Prefer': 'rewritten'}
|
headers = {'Prefer': 'rewritten'}
|
||||||
fmod_slash = fmod + '/' if fmod else ''
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod, with_slash=True, headers=headers, status=200)
|
||||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
|
||||||
|
|
||||||
assert '/pywb/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text
|
assert self.format('/pywb/{0}http://www.iana.org/time-zones"', fmod[0], with_slash=True) in resp.text
|
||||||
self._assert_rewritten(resp, fmod)
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_memento_raw(self, fmod):
|
def test_prefer_memento_raw(self, fmod):
|
||||||
@ -137,6 +166,7 @@ class TestPreferWithNoRedirects(BaseConfigTest):
|
|||||||
headers = {'Prefer': 'rewritten'}
|
headers = {'Prefer': 'rewritten'}
|
||||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
assert self.format('/pywb/20140127171238{0}/http://www.iana.org/time-zones"', fmod[0]) in resp.text
|
||||||
self._assert_rewritten(resp, fmod)
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_memento_raw_id_mod(self):
|
def test_prefer_memento_raw_id_mod(self):
|
||||||
@ -151,7 +181,7 @@ class TestPreferWithNoRedirects(BaseConfigTest):
|
|||||||
|
|
||||||
self._assert_rewritten(resp, fmod)
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
def test_prefer_memento_rewritten_diff_mod(self):
|
def test_prefer_memento_rewritten_diff_mod(self, fmod):
|
||||||
headers = {'Prefer': 'raw'}
|
headers = {'Prefer': 'raw'}
|
||||||
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=200)
|
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user