mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento prefer header: add support for Prefer header for specifying 'raw' or 'rewritten' mementos (ukwa/ukwa-pywb#12, based on mementoweb/rfc-extensions#6)
- 'enable_prefer: true' in config can be used to enable experimental Memento Prefer behavior - Prefer header support both redirect and non-redirect style negotiation, extending existing Memento patterns - Prefer header can be applied both on memento and timegate endpoints - for redirect style negotiation, Prefer results in a redirect to final memento (if needed), both on Timegate and URL-M (Memento Pattern 2.3) - for non-redirect style negotiation (Memento Pattern 2.2), Prefer header affects content being served and changes the Content-Location to the canonical representation - Vary: Prefer and Preference-Applied headers always added to URL-M and Timegate responses
This commit is contained in:
parent
0d68f67049
commit
5364275ef5
@ -60,6 +60,11 @@ class RewriterApp(object):
|
||||
self.frame_mod = None
|
||||
self.replay_mod = ''
|
||||
|
||||
self.enable_prefer = self.config.get('enable_prefer', False)
|
||||
self.prefs = {'raw': 'id_',
|
||||
'rewritten': self.replay_mod
|
||||
}
|
||||
|
||||
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod,
|
||||
config=config)
|
||||
|
||||
@ -139,6 +144,19 @@ class RewriterApp(object):
|
||||
|
||||
return is_timegate
|
||||
|
||||
def _get_prefer_mod(self, wb_url, environ):
|
||||
if not self.enable_prefer:
|
||||
return None, None
|
||||
|
||||
prefer = environ.get('HTTP_PREFER')
|
||||
if not prefer:
|
||||
return None, 'raw' if wb_url.is_identity else 'rewritten'
|
||||
|
||||
try:
|
||||
return self.prefs[prefer], prefer
|
||||
except:
|
||||
raise UpstreamException(400, url=wb_url.url, details='Invalid Prefer: ' + prefer)
|
||||
|
||||
def _check_range(self, inputreq, wb_url):
|
||||
skip_record = False
|
||||
range_start = None
|
||||
@ -268,6 +286,24 @@ class RewriterApp(object):
|
||||
if not url_parts.path:
|
||||
return self.send_redirect('/', url_parts, urlrewriter)
|
||||
|
||||
# Check Prefer
|
||||
pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ)
|
||||
|
||||
# fast-redirect to preferred
|
||||
if pref_mod is not None:
|
||||
if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod:
|
||||
new_url = urlrewriter.get_new_url(url=wb_url.url,
|
||||
timestamp=wb_url.timestamp,
|
||||
mod=pref_mod)
|
||||
headers = [('Preference-Applied', pref_applied),
|
||||
('Vary', 'Prefer')]
|
||||
|
||||
return WbResponse.redir_response(new_url,
|
||||
'307 Temporary Redirect',
|
||||
headers=headers)
|
||||
else:
|
||||
wb_url.mod = pref_mod
|
||||
|
||||
self.unrewrite_referrer(environ, full_prefix)
|
||||
|
||||
urlkey = canonicalize(wb_url.url)
|
||||
@ -357,7 +393,9 @@ class RewriterApp(object):
|
||||
self._add_memento_links(target_uri, full_prefix,
|
||||
memento_dt, cdx['timestamp'],
|
||||
resp.status_headers,
|
||||
is_timegate, is_proxy)
|
||||
is_timegate, is_proxy,
|
||||
pref_applied=pref_applied,
|
||||
mod=pref_mod)
|
||||
|
||||
else:
|
||||
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
||||
@ -421,13 +459,15 @@ class RewriterApp(object):
|
||||
if not is_ajax and self.enable_memento:
|
||||
self._add_memento_links(cdx['url'], full_prefix,
|
||||
memento_dt, cdx['timestamp'], status_headers,
|
||||
is_timegate, is_proxy, cdx.get('source-coll'))
|
||||
is_timegate, is_proxy, cdx.get('source-coll'),
|
||||
mod=pref_mod, pref_applied=pref_applied)
|
||||
|
||||
set_content_loc = True
|
||||
|
||||
if set_content_loc and not self.redirect_to_exact:
|
||||
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
||||
url=cdx['url'])))
|
||||
|
||||
if not is_proxy:
|
||||
self.add_csp_header(wb_url, status_headers)
|
||||
|
||||
@ -454,7 +494,10 @@ class RewriterApp(object):
|
||||
return response
|
||||
|
||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||
status_headers, is_timegate, is_proxy, coll=None):
|
||||
status_headers, is_timegate, is_proxy, coll=None,
|
||||
pref_applied=None, mod=None):
|
||||
|
||||
mod = mod or self.replay_mod
|
||||
|
||||
# memento url + header
|
||||
if not memento_dt and memento_ts:
|
||||
@ -466,12 +509,12 @@ class RewriterApp(object):
|
||||
if is_proxy:
|
||||
memento_url = url
|
||||
else:
|
||||
memento_url = full_prefix + memento_ts + self.replay_mod
|
||||
memento_url = full_prefix + memento_ts + mod
|
||||
memento_url += '/' + url
|
||||
else:
|
||||
memento_url = None
|
||||
|
||||
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)
|
||||
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix, mod)
|
||||
|
||||
link = []
|
||||
if not is_proxy:
|
||||
@ -486,14 +529,22 @@ class RewriterApp(object):
|
||||
|
||||
status_headers.headers.append(('Link', link_str))
|
||||
|
||||
vary = ''
|
||||
if is_timegate:
|
||||
status_headers.headers.append(('Vary', 'accept-datetime'))
|
||||
vary = 'accept-datetime'
|
||||
|
||||
def _get_timegate_timemap(self, url, full_prefix):
|
||||
if pref_applied:
|
||||
vary = 'Prefer' if not vary else vary + ', Prefer'
|
||||
status_headers.headers.append(('Preference-Applied', pref_applied))
|
||||
|
||||
if vary:
|
||||
status_headers.headers.append(('Vary', vary))
|
||||
|
||||
def _get_timegate_timemap(self, url, full_prefix, mod):
|
||||
# timegate url
|
||||
timegate_url = full_prefix
|
||||
if self.replay_mod:
|
||||
timegate_url += self.replay_mod + '/'
|
||||
if mod:
|
||||
timegate_url += mod + '/'
|
||||
|
||||
timegate_url += url
|
||||
|
||||
@ -585,7 +636,7 @@ class RewriterApp(object):
|
||||
status = str(res.status_code) + ' ' + res.reason
|
||||
|
||||
if res.status_code == 200 and output == 'link':
|
||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
|
||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, self.replay_mod)
|
||||
|
||||
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
||||
timegate,
|
||||
|
@ -14,5 +14,6 @@ enable_flash_video_rewrite: true
|
||||
redirect_to_exact: true
|
||||
|
||||
enable_memento: true
|
||||
enable_prefer: true
|
||||
|
||||
debug: true
|
||||
|
164
tests/test_prefer_header.py
Normal file
164
tests/test_prefer_header.py
Normal file
@ -0,0 +1,164 @@
|
||||
from .base_config_test import BaseConfigTest, fmod
|
||||
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestPreferWithRedirects(BaseConfigTest):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml')
|
||||
|
||||
def _assert_pref_headers(self, resp, pref):
|
||||
assert resp.headers['Preference-Applied'] == pref
|
||||
assert 'Prefer' in resp.headers['Vary']
|
||||
|
||||
def _assert_raw_memento(self, resp):
|
||||
self._assert_pref_headers(resp, 'raw')
|
||||
assert '"/time-zones"' in resp.text, resp.text
|
||||
assert 'wombat.js' not in resp.text
|
||||
|
||||
def _assert_rewritten(self, resp, fmod):
|
||||
self._assert_pref_headers(resp, 'rewritten')
|
||||
|
||||
assert '"20140127171238"' in resp.text
|
||||
assert 'wombat.js' in resp.text
|
||||
assert 'new _WBWombat' in resp.text, resp.text
|
||||
assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text
|
||||
|
||||
def _assert_redir_to_raw(self, resp):
|
||||
self._assert_pref_headers(resp, 'raw')
|
||||
|
||||
assert resp.location.endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||
resp = resp.follow()
|
||||
|
||||
self._assert_raw_memento(resp)
|
||||
|
||||
def _assert_redir_to_rewritten(self, resp, fmod):
|
||||
self._assert_pref_headers(resp, 'rewritten')
|
||||
|
||||
assert resp.location.endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
||||
resp = resp.follow()
|
||||
|
||||
self._assert_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_redir_timegate_raw(self, fmod):
|
||||
headers = {'Prefer': 'raw'}
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
||||
|
||||
self._assert_redir_to_raw(resp)
|
||||
|
||||
def test_prefer_redir_timegate_rewritten(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
||||
|
||||
self._assert_redir_to_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_redir_memento_to_raw(self, fmod):
|
||||
headers = {'Prefer': 'raw'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=307)
|
||||
|
||||
self._assert_redir_to_raw(resp)
|
||||
|
||||
def test_prefer_redir_memento_redir_to_rewritten_diff_mod(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=307)
|
||||
|
||||
self._assert_redir_to_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_redir_memento_matches_rewritten(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||
|
||||
self._assert_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_redir_memento_matches_raw(self):
|
||||
headers = {'Prefer': 'raw'}
|
||||
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
||||
|
||||
self._assert_raw_memento(resp)
|
||||
|
||||
def test_prefer_redir_invalid(self, fmod):
|
||||
headers = {'Prefer': 'unknown'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=400)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestPreferWithNoRedirects(BaseConfigTest):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestPreferWithNoRedirects, cls).setup_class('config_test.yaml',
|
||||
custom_config={'enable_prefer': True})
|
||||
|
||||
def _assert_pref_headers(self, resp, pref):
|
||||
assert resp.headers['Preference-Applied'] == pref
|
||||
assert 'Prefer' in resp.headers['Vary']
|
||||
|
||||
def _assert_raw(self, resp):
|
||||
self._assert_pref_headers(resp, 'raw')
|
||||
assert '"/time-zones"' in resp.text, resp.text
|
||||
assert 'wombat.js' not in resp.text
|
||||
|
||||
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||
|
||||
def _assert_rewritten(self, resp, fmod):
|
||||
self._assert_pref_headers(resp, 'rewritten')
|
||||
|
||||
assert '"20140127171238"' in resp.text
|
||||
assert 'wombat.js' in resp.text
|
||||
assert 'new _WBWombat' in resp.text, resp.text
|
||||
|
||||
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
||||
|
||||
def test_prefer_timegate_raw(self, fmod):
|
||||
headers = {'Prefer': 'raw'}
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
||||
|
||||
self._assert_raw(resp)
|
||||
|
||||
def test_prefer_timegate_rewritten(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
||||
|
||||
assert '/pywb/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text
|
||||
self._assert_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_memento_raw(self, fmod):
|
||||
headers = {'Prefer': 'raw'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||
|
||||
self._assert_raw(resp)
|
||||
|
||||
def test_prefer_memento_rewritten(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||
|
||||
self._assert_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_memento_raw_id_mod(self):
|
||||
headers = {'Prefer': 'raw'}
|
||||
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
||||
|
||||
self._assert_raw(resp)
|
||||
|
||||
def test_prefer_memento_rewritten_id_mod(self, fmod):
|
||||
headers = {'Prefer': 'rewritten'}
|
||||
resp = self.get('/pywb/20140127171238id_/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||
|
||||
self._assert_rewritten(resp, fmod)
|
||||
|
||||
def test_prefer_memento_rewritten_diff_mod(self):
|
||||
headers = {'Prefer': 'raw'}
|
||||
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||
|
||||
self._assert_raw(resp)
|
||||
|
||||
def test_prefer_invalid(self, fmod):
|
||||
headers = {'Prefer': 'unknown'}
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=400)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user