diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 74966000..d7829081 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -75,6 +75,8 @@ class RewriterApp(object): self.jinja_env = jinja_env + self.redirect_to_exact = config.get('redirect_to_exact') + self.banner_view = BaseInsertView(self.jinja_env, self._html_templ('banner_html')) self.head_insert_view = HeadInsertView(self.jinja_env, @@ -89,7 +91,7 @@ class RewriterApp(object): self.not_found_view = BaseInsertView(self.jinja_env, self._html_templ('not_found_html')) self.query_view = BaseInsertView(self.jinja_env, self._html_templ('query_html')) - self.use_js_obj_proxy = config.get('use_js_obj_proxy', False) + self.use_js_obj_proxy = config.get('use_js_obj_proxy', True) self.cookie_tracker = None @@ -167,9 +169,14 @@ class RewriterApp(object): scheme, netloc, path, query, frag = url_parts path = '/' url = urlunsplit((scheme, netloc, path, query, frag)) - return WbResponse.redir_response(urlrewriter.rewrite(url), + resp = WbResponse.redir_response(urlrewriter.rewrite(url), '307 Temporary Redirect') + if self.enable_memento: + resp.status_headers['Link'] = MementoUtils.make_link(url, 'original') + + return resp + self.unrewrite_referrer(environ, full_prefix) urlkey = canonicalize(wb_url.url) @@ -263,8 +270,27 @@ class RewriterApp(object): if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1': set_content_loc = True - # return WbResponse.redir_response(urlrewriter.rewrite(target_uri), - # '307 Temporary Redirect') + # if redir to exact, redir if url or ts are different + if self.redirect_to_exact: + if (set_content_loc or + (wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))): + + new_url = urlrewriter.get_new_url(url=target_uri, + timestamp=cdx['timestamp'], + mod=wb_url.mod) + + resp = WbResponse.redir_response(new_url, '307 Temporary Redirect') + if self.enable_memento: + if is_timegate and not is_proxy: + self._add_memento_links(target_uri, full_prefix, + memento_dt, cdx['timestamp'], + resp.status_headers, + is_timegate, is_proxy) + + else: + resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original') + + return resp self._add_custom_params(cdx, r.headers, kwargs) @@ -290,7 +316,8 @@ class RewriterApp(object): host_prefix, top_url, environ, - framed_replay)) + framed_replay, + config=self.config)) cookie_rewriter = None if self.cookie_tracker: @@ -315,10 +342,9 @@ class RewriterApp(object): set_content_loc = True - if set_content_loc: + if set_content_loc and not self.redirect_to_exact: status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'], url=cdx['url']))) - if not is_proxy: self.add_csp_header(wb_url, status_headers) @@ -339,8 +365,9 @@ class RewriterApp(object): response = WbResponse.text_response(response, content_type=content_type) - self._add_memento_links(wb_url.url, full_prefix, None, memento_ts, - response.status_headers, is_timegate, is_proxy) + if self.enable_memento: + self._add_memento_links(wb_url.url, full_prefix, None, memento_ts, + response.status_headers, is_timegate, is_proxy) return response def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts, diff --git a/pywb/rewrite/templateview.py b/pywb/rewrite/templateview.py index 2e81b12b..74b1c9c0 100644 --- a/pywb/rewrite/templateview.py +++ b/pywb/rewrite/templateview.py @@ -153,15 +153,16 @@ class HeadInsertView(BaseInsertView): env, is_framed, coll='', - include_ts=True): + include_ts=True, + **kwargs): - params = {'host_prefix': host_prefix, - 'wb_prefix': wb_prefix, - 'wb_url': wb_url, - 'coll': coll, - 'is_framed': 'true' if is_framed else 'false', - 'top_url': top_url, - } + params = kwargs + params['host_prefix'] = host_prefix + params['wb_prefix'] = wb_prefix + params['wb_url'] = wb_url + params['top_url'] = top_url + params['coll'] = coll + params['is_framed'] = 'true' if is_framed else 'false' def make_head_insert(rule, cdx): params['wombat_ts'] = cdx['timestamp'] if include_ts else '' diff --git a/pywb/templates/head_insert.html b/pywb/templates/head_insert.html index d32ff310..c73efaec 100644 --- a/pywb/templates/head_insert.html +++ b/pywb/templates/head_insert.html @@ -34,6 +34,10 @@ +{% if config.enable_flash_video_rewrite %} + +{% endif %} + {{ banner_html }} diff --git a/tests/config_test_redirect_classic.yaml b/tests/config_test_redirect_classic.yaml new file mode 100644 index 00000000..21321416 --- /dev/null +++ b/tests/config_test_redirect_classic.yaml @@ -0,0 +1,18 @@ +# config similar to old pywb setup +# -redirect requests +# -include video rewrite + +collections: + pywb: + index: ./sample_archive/cdx/ + archive_paths: ./sample_archive/warcs/ + + live: $live + +enable_flash_video_rewrite: true + +redirect_to_exact: true + +enable_memento: true + +debug: true diff --git a/tests/test_proxy.py b/tests/test_proxy.py index 3e603d50..14254177 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -63,6 +63,19 @@ class TestProxy(BaseTestProxy): assert res.headers['Link'] == '; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"; collection="pywb"' assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT' + def test_proxy_replay_change_dt(self, scheme): + headers = {'Accept-Datetime': 'Mon, 26 Dec 2011 17:12:51 GMT'} + res = requests.get('{0}://example.com/'.format(scheme), + proxies=self.proxies, + headers=headers, + verify=self.root_ca_file) + + assert 'WB Insert' in res.text + assert 'Example Domain' in res.text + + assert res.headers['Link'] == '; rel="memento"; datetime="Mon, 29 Jul 2013 19:51:51 GMT"; collection="pywb"' + assert res.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT' + # ============================================================================ class TestRecordingProxy(CollsDirMixin, BaseTestProxy): diff --git a/tests/test_redirect_classic.py b/tests/test_redirect_classic.py new file mode 100644 index 00000000..073709e7 --- /dev/null +++ b/tests/test_redirect_classic.py @@ -0,0 +1,77 @@ +from .base_config_test import BaseConfigTest, fmod + + +# ============================================================================ +class TestRedirectClassic(BaseConfigTest): + @classmethod + def setup_class(cls): + super(TestRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml') + + def test_replay_content_inexact(self, fmod): + resp = self.get('/pywb/20140127171235{0}/http://www.iana.org/', fmod) + + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20140127171238{0}/http://www.iana.org/'.format(fmod)) + assert resp.headers['Link'] == '; rel="original"' + resp = resp.follow() + + self._assert_basic_html(resp) + + assert '"20140127171238"' in resp.text, resp.text + assert 'wombat.js' in resp.text + assert 'new _WBWombat' in resp.text, resp.text + assert '/pywb/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text + + assert ('wbinfo.is_framed = ' + ('true' if fmod else 'false')) in resp.text + + csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'" + assert resp.headers['Content-Security-Policy'] == csp + + # verify enable_rewrite_flash_video is injected + assert 'vidrw.js' in resp.text + + def test_latest_replay_redirect(self, fmod): + fmod_slash = fmod + '/' if fmod else '' + + resp = self.get('/pywb/{0}http://example.com/', fmod_slash) + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20140127171251{0}/http://example.com'.format(fmod)) + assert resp.headers['Link'] != '' + + # trailing slash redir + resp = resp.follow() + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20140127171251{0}/http://example.com/'.format(fmod)) + assert resp.headers['Link'] != '' + + resp = resp.follow() + self._assert_basic_html(resp) + assert resp.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT' + + assert '"20140127171251"' in resp.text + assert '/pywb/20140127171251{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text, resp.text + + def test_replay_memento_accept_dt(self, fmod): + fmod_slash = fmod + '/' if fmod else '' + headers = {'Accept-Datetime': 'Mon, 26 Dec 2011 17:12:51 GMT'} + + resp = self.get('/pywb/{0}http://example.com/', fmod_slash, headers=headers) + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20130729195151{0}/http://test@example.com/'.format(fmod)) + assert resp.headers['Link'] != '' + + resp = resp.follow() + self._assert_basic_html(resp) + assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT' + + def test_replay_fuzzy_1_redirect(self, fmod): + resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod) + assert resp.status_int == 307 + assert resp.headers['Location'].endswith('/pywb/20140126200624{0}/http://www.iana.org/'.format(fmod)) + + def test_live_no_redir(self, fmod): + fmod_slash = fmod + '/' if fmod else '' + resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash) + assert resp.status_int == 200 + +