mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
support for 'classic' pywb features and misc improvements: (#261)
* support for 'classic' pywb features and misc improvements: - add support for redirect to exact timestamp mode via 'redirect_to_exact: true' config setting - tests: ensure memento headers added for redirect-to-exact - memento: ensure Link header added for intermediate resources, check for 'enable_memento' before adding - config: config passed to head_insert template as 'config' - insert legacy 'vidrw.js' script if 'enable_flash_video_rewrite' config is set to true - config: use_js_obj_proxy now defaults to true - memento/tests: add proxy with custom accept-datetime test
This commit is contained in:
parent
459cd706d3
commit
4b60dd5dda
@ -75,6 +75,8 @@ class RewriterApp(object):
|
||||
|
||||
self.jinja_env = jinja_env
|
||||
|
||||
self.redirect_to_exact = config.get('redirect_to_exact')
|
||||
|
||||
self.banner_view = BaseInsertView(self.jinja_env, self._html_templ('banner_html'))
|
||||
|
||||
self.head_insert_view = HeadInsertView(self.jinja_env,
|
||||
@ -89,7 +91,7 @@ class RewriterApp(object):
|
||||
self.not_found_view = BaseInsertView(self.jinja_env, self._html_templ('not_found_html'))
|
||||
self.query_view = BaseInsertView(self.jinja_env, self._html_templ('query_html'))
|
||||
|
||||
self.use_js_obj_proxy = config.get('use_js_obj_proxy', False)
|
||||
self.use_js_obj_proxy = config.get('use_js_obj_proxy', True)
|
||||
|
||||
self.cookie_tracker = None
|
||||
|
||||
@ -167,9 +169,14 @@ class RewriterApp(object):
|
||||
scheme, netloc, path, query, frag = url_parts
|
||||
path = '/'
|
||||
url = urlunsplit((scheme, netloc, path, query, frag))
|
||||
return WbResponse.redir_response(urlrewriter.rewrite(url),
|
||||
resp = WbResponse.redir_response(urlrewriter.rewrite(url),
|
||||
'307 Temporary Redirect')
|
||||
|
||||
if self.enable_memento:
|
||||
resp.status_headers['Link'] = MementoUtils.make_link(url, 'original')
|
||||
|
||||
return resp
|
||||
|
||||
self.unrewrite_referrer(environ, full_prefix)
|
||||
|
||||
urlkey = canonicalize(wb_url.url)
|
||||
@ -263,8 +270,27 @@ class RewriterApp(object):
|
||||
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
||||
set_content_loc = True
|
||||
|
||||
# return WbResponse.redir_response(urlrewriter.rewrite(target_uri),
|
||||
# '307 Temporary Redirect')
|
||||
# if redir to exact, redir if url or ts are different
|
||||
if self.redirect_to_exact:
|
||||
if (set_content_loc or
|
||||
(wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))):
|
||||
|
||||
new_url = urlrewriter.get_new_url(url=target_uri,
|
||||
timestamp=cdx['timestamp'],
|
||||
mod=wb_url.mod)
|
||||
|
||||
resp = WbResponse.redir_response(new_url, '307 Temporary Redirect')
|
||||
if self.enable_memento:
|
||||
if is_timegate and not is_proxy:
|
||||
self._add_memento_links(target_uri, full_prefix,
|
||||
memento_dt, cdx['timestamp'],
|
||||
resp.status_headers,
|
||||
is_timegate, is_proxy)
|
||||
|
||||
else:
|
||||
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
||||
|
||||
return resp
|
||||
|
||||
self._add_custom_params(cdx, r.headers, kwargs)
|
||||
|
||||
@ -290,7 +316,8 @@ class RewriterApp(object):
|
||||
host_prefix,
|
||||
top_url,
|
||||
environ,
|
||||
framed_replay))
|
||||
framed_replay,
|
||||
config=self.config))
|
||||
|
||||
cookie_rewriter = None
|
||||
if self.cookie_tracker:
|
||||
@ -315,10 +342,9 @@ class RewriterApp(object):
|
||||
|
||||
set_content_loc = True
|
||||
|
||||
if set_content_loc:
|
||||
if set_content_loc and not self.redirect_to_exact:
|
||||
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
||||
url=cdx['url'])))
|
||||
|
||||
if not is_proxy:
|
||||
self.add_csp_header(wb_url, status_headers)
|
||||
|
||||
@ -339,8 +365,9 @@ class RewriterApp(object):
|
||||
|
||||
response = WbResponse.text_response(response, content_type=content_type)
|
||||
|
||||
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
||||
response.status_headers, is_timegate, is_proxy)
|
||||
if self.enable_memento:
|
||||
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
||||
response.status_headers, is_timegate, is_proxy)
|
||||
return response
|
||||
|
||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||
|
@ -153,15 +153,16 @@ class HeadInsertView(BaseInsertView):
|
||||
env,
|
||||
is_framed,
|
||||
coll='',
|
||||
include_ts=True):
|
||||
include_ts=True,
|
||||
**kwargs):
|
||||
|
||||
params = {'host_prefix': host_prefix,
|
||||
'wb_prefix': wb_prefix,
|
||||
'wb_url': wb_url,
|
||||
'coll': coll,
|
||||
'is_framed': 'true' if is_framed else 'false',
|
||||
'top_url': top_url,
|
||||
}
|
||||
params = kwargs
|
||||
params['host_prefix'] = host_prefix
|
||||
params['wb_prefix'] = wb_prefix
|
||||
params['wb_url'] = wb_url
|
||||
params['top_url'] = top_url
|
||||
params['coll'] = coll
|
||||
params['is_framed'] = 'true' if is_framed else 'false'
|
||||
|
||||
def make_head_insert(rule, cdx):
|
||||
params['wombat_ts'] = cdx['timestamp'] if include_ts else ''
|
||||
|
@ -34,6 +34,10 @@
|
||||
|
||||
</script>
|
||||
|
||||
{% if config.enable_flash_video_rewrite %}
|
||||
<script src='{{ host_prefix }}/{{ static_path }}/vidrw.js'> </script>
|
||||
{% endif %}
|
||||
|
||||
{{ banner_html }}
|
||||
|
||||
<!-- End WB Insert -->
|
||||
|
18
tests/config_test_redirect_classic.yaml
Normal file
18
tests/config_test_redirect_classic.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
# config similar to old pywb setup
|
||||
# -redirect requests
|
||||
# -include video rewrite
|
||||
|
||||
collections:
|
||||
pywb:
|
||||
index: ./sample_archive/cdx/
|
||||
archive_paths: ./sample_archive/warcs/
|
||||
|
||||
live: $live
|
||||
|
||||
enable_flash_video_rewrite: true
|
||||
|
||||
redirect_to_exact: true
|
||||
|
||||
enable_memento: true
|
||||
|
||||
debug: true
|
@ -63,6 +63,19 @@ class TestProxy(BaseTestProxy):
|
||||
assert res.headers['Link'] == '<http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"; collection="pywb"'
|
||||
assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'
|
||||
|
||||
def test_proxy_replay_change_dt(self, scheme):
|
||||
headers = {'Accept-Datetime': 'Mon, 26 Dec 2011 17:12:51 GMT'}
|
||||
res = requests.get('{0}://example.com/'.format(scheme),
|
||||
proxies=self.proxies,
|
||||
headers=headers,
|
||||
verify=self.root_ca_file)
|
||||
|
||||
assert 'WB Insert' in res.text
|
||||
assert 'Example Domain' in res.text
|
||||
|
||||
assert res.headers['Link'] == '<http://test@example.com/>; rel="memento"; datetime="Mon, 29 Jul 2013 19:51:51 GMT"; collection="pywb"'
|
||||
assert res.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
|
||||
|
77
tests/test_redirect_classic.py
Normal file
77
tests/test_redirect_classic.py
Normal file
@ -0,0 +1,77 @@
|
||||
from .base_config_test import BaseConfigTest, fmod
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRedirectClassic(BaseConfigTest):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
||||
|
||||
def test_replay_content_inexact(self, fmod):
|
||||
resp = self.get('/pywb/20140127171235{0}/http://www.iana.org/', fmod)
|
||||
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20140127171238{0}/http://www.iana.org/'.format(fmod))
|
||||
assert resp.headers['Link'] == '<http://www.iana.org/>; rel="original"'
|
||||
resp = resp.follow()
|
||||
|
||||
self._assert_basic_html(resp)
|
||||
|
||||
assert '"20140127171238"' in resp.text, resp.text
|
||||
assert 'wombat.js' in resp.text
|
||||
assert 'new _WBWombat' in resp.text, resp.text
|
||||
assert '/pywb/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text
|
||||
|
||||
assert ('wbinfo.is_framed = ' + ('true' if fmod else 'false')) in resp.text
|
||||
|
||||
csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'"
|
||||
assert resp.headers['Content-Security-Policy'] == csp
|
||||
|
||||
# verify enable_rewrite_flash_video is injected
|
||||
assert 'vidrw.js' in resp.text
|
||||
|
||||
def test_latest_replay_redirect(self, fmod):
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
|
||||
resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20140127171251{0}/http://example.com'.format(fmod))
|
||||
assert resp.headers['Link'] != ''
|
||||
|
||||
# trailing slash redir
|
||||
resp = resp.follow()
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20140127171251{0}/http://example.com/'.format(fmod))
|
||||
assert resp.headers['Link'] != ''
|
||||
|
||||
resp = resp.follow()
|
||||
self._assert_basic_html(resp)
|
||||
assert resp.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'
|
||||
|
||||
assert '"20140127171251"' in resp.text
|
||||
assert '/pywb/20140127171251{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text, resp.text
|
||||
|
||||
def test_replay_memento_accept_dt(self, fmod):
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
headers = {'Accept-Datetime': 'Mon, 26 Dec 2011 17:12:51 GMT'}
|
||||
|
||||
resp = self.get('/pywb/{0}http://example.com/', fmod_slash, headers=headers)
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20130729195151{0}/http://test@example.com/'.format(fmod))
|
||||
assert resp.headers['Link'] != ''
|
||||
|
||||
resp = resp.follow()
|
||||
self._assert_basic_html(resp)
|
||||
assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
|
||||
|
||||
def test_replay_fuzzy_1_redirect(self, fmod):
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod)
|
||||
assert resp.status_int == 307
|
||||
assert resp.headers['Location'].endswith('/pywb/20140126200624{0}/http://www.iana.org/'.format(fmod))
|
||||
|
||||
def test_live_no_redir(self, fmod):
|
||||
fmod_slash = fmod + '/' if fmod else ''
|
||||
resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash)
|
||||
assert resp.status_int == 200
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user