mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
memento prefer header: add support for Prefer header for specifying 'raw' or 'rewritten' mementos (ukwa/ukwa-pywb#12, based on mementoweb/rfc-extensions#6)
- 'enable_prefer: true' in config can be used to enable experimental Memento Prefer behavior - Prefer header support both redirect and non-redirect style negotiation, extending existing Memento patterns - Prefer header can be applied both on memento and timegate endpoints - for redirect style negotiation, Prefer results in a redirect to final memento (if needed), both on Timegate and URL-M (Memento Pattern 2.3) - for non-redirect style negotiation (Memento Pattern 2.2), Prefer header affects content being served and changes the Content-Location to the canonical representation - Vary: Prefer and Preference-Applied headers always added to URL-M and Timegate responses
This commit is contained in:
parent
0d68f67049
commit
5364275ef5
@ -60,6 +60,11 @@ class RewriterApp(object):
|
|||||||
self.frame_mod = None
|
self.frame_mod = None
|
||||||
self.replay_mod = ''
|
self.replay_mod = ''
|
||||||
|
|
||||||
|
self.enable_prefer = self.config.get('enable_prefer', False)
|
||||||
|
self.prefs = {'raw': 'id_',
|
||||||
|
'rewritten': self.replay_mod
|
||||||
|
}
|
||||||
|
|
||||||
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod,
|
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod,
|
||||||
config=config)
|
config=config)
|
||||||
|
|
||||||
@ -139,6 +144,19 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
return is_timegate
|
return is_timegate
|
||||||
|
|
||||||
|
def _get_prefer_mod(self, wb_url, environ):
|
||||||
|
if not self.enable_prefer:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
prefer = environ.get('HTTP_PREFER')
|
||||||
|
if not prefer:
|
||||||
|
return None, 'raw' if wb_url.is_identity else 'rewritten'
|
||||||
|
|
||||||
|
try:
|
||||||
|
return self.prefs[prefer], prefer
|
||||||
|
except:
|
||||||
|
raise UpstreamException(400, url=wb_url.url, details='Invalid Prefer: ' + prefer)
|
||||||
|
|
||||||
def _check_range(self, inputreq, wb_url):
|
def _check_range(self, inputreq, wb_url):
|
||||||
skip_record = False
|
skip_record = False
|
||||||
range_start = None
|
range_start = None
|
||||||
@ -268,6 +286,24 @@ class RewriterApp(object):
|
|||||||
if not url_parts.path:
|
if not url_parts.path:
|
||||||
return self.send_redirect('/', url_parts, urlrewriter)
|
return self.send_redirect('/', url_parts, urlrewriter)
|
||||||
|
|
||||||
|
# Check Prefer
|
||||||
|
pref_mod, pref_applied = self._get_prefer_mod(wb_url, environ)
|
||||||
|
|
||||||
|
# fast-redirect to preferred
|
||||||
|
if pref_mod is not None:
|
||||||
|
if self.redirect_to_exact and not is_timegate and pref_mod != wb_url.mod:
|
||||||
|
new_url = urlrewriter.get_new_url(url=wb_url.url,
|
||||||
|
timestamp=wb_url.timestamp,
|
||||||
|
mod=pref_mod)
|
||||||
|
headers = [('Preference-Applied', pref_applied),
|
||||||
|
('Vary', 'Prefer')]
|
||||||
|
|
||||||
|
return WbResponse.redir_response(new_url,
|
||||||
|
'307 Temporary Redirect',
|
||||||
|
headers=headers)
|
||||||
|
else:
|
||||||
|
wb_url.mod = pref_mod
|
||||||
|
|
||||||
self.unrewrite_referrer(environ, full_prefix)
|
self.unrewrite_referrer(environ, full_prefix)
|
||||||
|
|
||||||
urlkey = canonicalize(wb_url.url)
|
urlkey = canonicalize(wb_url.url)
|
||||||
@ -357,7 +393,9 @@ class RewriterApp(object):
|
|||||||
self._add_memento_links(target_uri, full_prefix,
|
self._add_memento_links(target_uri, full_prefix,
|
||||||
memento_dt, cdx['timestamp'],
|
memento_dt, cdx['timestamp'],
|
||||||
resp.status_headers,
|
resp.status_headers,
|
||||||
is_timegate, is_proxy)
|
is_timegate, is_proxy,
|
||||||
|
pref_applied=pref_applied,
|
||||||
|
mod=pref_mod)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
||||||
@ -421,13 +459,15 @@ class RewriterApp(object):
|
|||||||
if not is_ajax and self.enable_memento:
|
if not is_ajax and self.enable_memento:
|
||||||
self._add_memento_links(cdx['url'], full_prefix,
|
self._add_memento_links(cdx['url'], full_prefix,
|
||||||
memento_dt, cdx['timestamp'], status_headers,
|
memento_dt, cdx['timestamp'], status_headers,
|
||||||
is_timegate, is_proxy, cdx.get('source-coll'))
|
is_timegate, is_proxy, cdx.get('source-coll'),
|
||||||
|
mod=pref_mod, pref_applied=pref_applied)
|
||||||
|
|
||||||
set_content_loc = True
|
set_content_loc = True
|
||||||
|
|
||||||
if set_content_loc and not self.redirect_to_exact:
|
if set_content_loc and not self.redirect_to_exact:
|
||||||
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
||||||
url=cdx['url'])))
|
url=cdx['url'])))
|
||||||
|
|
||||||
if not is_proxy:
|
if not is_proxy:
|
||||||
self.add_csp_header(wb_url, status_headers)
|
self.add_csp_header(wb_url, status_headers)
|
||||||
|
|
||||||
@ -454,7 +494,10 @@ class RewriterApp(object):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||||
status_headers, is_timegate, is_proxy, coll=None):
|
status_headers, is_timegate, is_proxy, coll=None,
|
||||||
|
pref_applied=None, mod=None):
|
||||||
|
|
||||||
|
mod = mod or self.replay_mod
|
||||||
|
|
||||||
# memento url + header
|
# memento url + header
|
||||||
if not memento_dt and memento_ts:
|
if not memento_dt and memento_ts:
|
||||||
@ -466,12 +509,12 @@ class RewriterApp(object):
|
|||||||
if is_proxy:
|
if is_proxy:
|
||||||
memento_url = url
|
memento_url = url
|
||||||
else:
|
else:
|
||||||
memento_url = full_prefix + memento_ts + self.replay_mod
|
memento_url = full_prefix + memento_ts + mod
|
||||||
memento_url += '/' + url
|
memento_url += '/' + url
|
||||||
else:
|
else:
|
||||||
memento_url = None
|
memento_url = None
|
||||||
|
|
||||||
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)
|
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix, mod)
|
||||||
|
|
||||||
link = []
|
link = []
|
||||||
if not is_proxy:
|
if not is_proxy:
|
||||||
@ -486,14 +529,22 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
status_headers.headers.append(('Link', link_str))
|
status_headers.headers.append(('Link', link_str))
|
||||||
|
|
||||||
|
vary = ''
|
||||||
if is_timegate:
|
if is_timegate:
|
||||||
status_headers.headers.append(('Vary', 'accept-datetime'))
|
vary = 'accept-datetime'
|
||||||
|
|
||||||
def _get_timegate_timemap(self, url, full_prefix):
|
if pref_applied:
|
||||||
|
vary = 'Prefer' if not vary else vary + ', Prefer'
|
||||||
|
status_headers.headers.append(('Preference-Applied', pref_applied))
|
||||||
|
|
||||||
|
if vary:
|
||||||
|
status_headers.headers.append(('Vary', vary))
|
||||||
|
|
||||||
|
def _get_timegate_timemap(self, url, full_prefix, mod):
|
||||||
# timegate url
|
# timegate url
|
||||||
timegate_url = full_prefix
|
timegate_url = full_prefix
|
||||||
if self.replay_mod:
|
if mod:
|
||||||
timegate_url += self.replay_mod + '/'
|
timegate_url += mod + '/'
|
||||||
|
|
||||||
timegate_url += url
|
timegate_url += url
|
||||||
|
|
||||||
@ -585,7 +636,7 @@ class RewriterApp(object):
|
|||||||
status = str(res.status_code) + ' ' + res.reason
|
status = str(res.status_code) + ' ' + res.reason
|
||||||
|
|
||||||
if res.status_code == 200 and output == 'link':
|
if res.status_code == 200 and output == 'link':
|
||||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
|
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, self.replay_mod)
|
||||||
|
|
||||||
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
||||||
timegate,
|
timegate,
|
||||||
|
@ -14,5 +14,6 @@ enable_flash_video_rewrite: true
|
|||||||
redirect_to_exact: true
|
redirect_to_exact: true
|
||||||
|
|
||||||
enable_memento: true
|
enable_memento: true
|
||||||
|
enable_prefer: true
|
||||||
|
|
||||||
debug: true
|
debug: true
|
||||||
|
164
tests/test_prefer_header.py
Normal file
164
tests/test_prefer_header.py
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
from .base_config_test import BaseConfigTest, fmod
|
||||||
|
|
||||||
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestPreferWithRedirects(BaseConfigTest):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestPreferWithRedirects, cls).setup_class('config_test_redirect_classic.yaml')
|
||||||
|
|
||||||
|
def _assert_pref_headers(self, resp, pref):
|
||||||
|
assert resp.headers['Preference-Applied'] == pref
|
||||||
|
assert 'Prefer' in resp.headers['Vary']
|
||||||
|
|
||||||
|
def _assert_raw_memento(self, resp):
|
||||||
|
self._assert_pref_headers(resp, 'raw')
|
||||||
|
assert '"/time-zones"' in resp.text, resp.text
|
||||||
|
assert 'wombat.js' not in resp.text
|
||||||
|
|
||||||
|
def _assert_rewritten(self, resp, fmod):
|
||||||
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
|
|
||||||
|
assert '"20140127171238"' in resp.text
|
||||||
|
assert 'wombat.js' in resp.text
|
||||||
|
assert 'new _WBWombat' in resp.text, resp.text
|
||||||
|
assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text
|
||||||
|
|
||||||
|
def _assert_redir_to_raw(self, resp):
|
||||||
|
self._assert_pref_headers(resp, 'raw')
|
||||||
|
|
||||||
|
assert resp.location.endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||||
|
resp = resp.follow()
|
||||||
|
|
||||||
|
self._assert_raw_memento(resp)
|
||||||
|
|
||||||
|
def _assert_redir_to_rewritten(self, resp, fmod):
|
||||||
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
|
|
||||||
|
assert resp.location.endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
||||||
|
resp = resp.follow()
|
||||||
|
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_redir_timegate_raw(self, fmod):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
fmod_slash = fmod + '/' if fmod else ''
|
||||||
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
||||||
|
|
||||||
|
self._assert_redir_to_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_redir_timegate_rewritten(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
fmod_slash = fmod + '/' if fmod else ''
|
||||||
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=307)
|
||||||
|
|
||||||
|
self._assert_redir_to_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_redir_memento_to_raw(self, fmod):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=307)
|
||||||
|
|
||||||
|
self._assert_redir_to_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_redir_memento_redir_to_rewritten_diff_mod(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=307)
|
||||||
|
|
||||||
|
self._assert_redir_to_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_redir_memento_matches_rewritten(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_redir_memento_matches_raw(self):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_raw_memento(resp)
|
||||||
|
|
||||||
|
def test_prefer_redir_invalid(self, fmod):
|
||||||
|
headers = {'Prefer': 'unknown'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=400)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestPreferWithNoRedirects(BaseConfigTest):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestPreferWithNoRedirects, cls).setup_class('config_test.yaml',
|
||||||
|
custom_config={'enable_prefer': True})
|
||||||
|
|
||||||
|
def _assert_pref_headers(self, resp, pref):
|
||||||
|
assert resp.headers['Preference-Applied'] == pref
|
||||||
|
assert 'Prefer' in resp.headers['Vary']
|
||||||
|
|
||||||
|
def _assert_raw(self, resp):
|
||||||
|
self._assert_pref_headers(resp, 'raw')
|
||||||
|
assert '"/time-zones"' in resp.text, resp.text
|
||||||
|
assert 'wombat.js' not in resp.text
|
||||||
|
|
||||||
|
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238id_/http://www.iana.org/')
|
||||||
|
|
||||||
|
def _assert_rewritten(self, resp, fmod):
|
||||||
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
|
|
||||||
|
assert '"20140127171238"' in resp.text
|
||||||
|
assert 'wombat.js' in resp.text
|
||||||
|
assert 'new _WBWombat' in resp.text, resp.text
|
||||||
|
|
||||||
|
assert resp.headers['Content-Location'].endswith('/pywb/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
||||||
|
|
||||||
|
def test_prefer_timegate_raw(self, fmod):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
fmod_slash = fmod + '/' if fmod else ''
|
||||||
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_timegate_rewritten(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
fmod_slash = fmod + '/' if fmod else ''
|
||||||
|
resp = self.get('/pywb/{0}http://www.iana.org/', fmod_slash, headers=headers, status=200)
|
||||||
|
|
||||||
|
assert '/pywb/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_memento_raw(self, fmod):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_memento_rewritten(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_memento_raw_id_mod(self):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
resp = self.testapp.get('/pywb/20140127171238id_/http://www.iana.org/', headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_memento_rewritten_id_mod(self, fmod):
|
||||||
|
headers = {'Prefer': 'rewritten'}
|
||||||
|
resp = self.get('/pywb/20140127171238id_/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_rewritten(resp, fmod)
|
||||||
|
|
||||||
|
def test_prefer_memento_rewritten_diff_mod(self):
|
||||||
|
headers = {'Prefer': 'raw'}
|
||||||
|
resp = self.get('/pywb/20140127171238js_/http://www.iana.org/', fmod, headers=headers, status=200)
|
||||||
|
|
||||||
|
self._assert_raw(resp)
|
||||||
|
|
||||||
|
def test_prefer_invalid(self, fmod):
|
||||||
|
headers = {'Prefer': 'unknown'}
|
||||||
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod, headers=headers, status=400)
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user