diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 335a77bf..75248cfe 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -302,7 +302,9 @@ class RewriterApp(object): kwargs) if response: - return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy) + # don't return top-frame response for timegate with exact redirects + if not is_timegate or not redirect_to_exact: + return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy) if is_proxy: environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host'] @@ -395,11 +397,9 @@ class RewriterApp(object): if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1': set_content_loc = True - # if redir to exact, redir if url or ts are different - if redirect_to_exact: - if (set_content_loc or - (wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))): - + # if redirect to exact timestamp, bit only if not live + if redirect_to_exact and not cdx.get('is_live'): + if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'): new_url = urlrewriter.get_new_url(url=target_uri, timestamp=cdx['timestamp'], mod=wb_url.mod) @@ -412,7 +412,8 @@ class RewriterApp(object): resp.status_headers, is_timegate, is_proxy, pref_applied=pref_applied, - mod=pref_mod) + mod=pref_mod, + is_memento=False) else: resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original') @@ -512,21 +513,22 @@ class RewriterApp(object): def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts, status_headers, is_timegate, is_proxy, coll=None, - pref_applied=None, mod=None): + pref_applied=None, mod=None, is_memento=True): - mod = mod or self.replay_mod + replay_mod = mod or self.replay_mod # memento url + header if not memento_dt and memento_ts: memento_dt = timestamp_to_http_date(memento_ts) if memento_dt: - status_headers.headers.append(('Memento-Datetime', memento_dt)) + if is_memento: + status_headers.headers.append(('Memento-Datetime', memento_dt)) if is_proxy: memento_url = url else: - memento_url = full_prefix + memento_ts + mod + memento_url = full_prefix + memento_ts + replay_mod memento_url += '/' + url else: memento_url = None @@ -560,6 +562,7 @@ class RewriterApp(object): def _get_timegate_timemap(self, url, full_prefix, mod): # timegate url timegate_url = full_prefix + mod = '' if mod: timegate_url += mod + '/' @@ -653,7 +656,7 @@ class RewriterApp(object): status = str(res.status_code) + ' ' + res.reason if res.status_code == 200 and output == 'link': - timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, self.replay_mod) + timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, wb_url.mod) text = MementoUtils.wrap_timemap_header(wb_url.url, timegate, diff --git a/tests/test_memento.py b/tests/test_memento.py index a0f18c85..521947fe 100644 --- a/tests/test_memento.py +++ b/tests/test_memento.py @@ -33,7 +33,7 @@ class TestMemento(MementoMixin, BaseConfigTest): assert resp.headers['Content-Location'] in memento_link # timegate link - assert self.make_timegate_link(url, fmod) in links + assert self.make_timegate_link(url, '') in links # timemap link assert self.make_timemap_link(url) in links @@ -60,7 +60,7 @@ class TestMemento(MementoMixin, BaseConfigTest): assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links #timegate link - assert self.make_timegate_link(url, 'mp_') in links + assert self.make_timegate_link(url, '') in links # Body assert '"20140127171238"' in resp.text @@ -132,7 +132,7 @@ class TestMemento(MementoMixin, BaseConfigTest): exp = """\ ; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT", -; rel="timegate", +; rel="timegate", ; rel="original", ; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb", ; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb" @@ -186,3 +186,75 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", " assert resp.status_int == 400 +# ============================================================================ +class TestMementoRedirectClassic(MementoMixin, BaseConfigTest): + @classmethod + def setup_class(cls): + super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml') + + def test_memento_top_frame_timegate(self, fmod): + resp = self.testapp.get('/pywb/http://www.iana.org/') + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20140127171238/http://www.iana.org/') + assert resp.headers['Link'] != '' + + # Memento Headers + assert VARY in resp.headers + assert MEMENTO_DATETIME not in resp.headers + + # memento link + dt = 'Mon, 27 Jan 2014 17:12:38 GMT' + url = 'http://www.iana.org/' + + links = self.get_links(resp) + + assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links + + #timegate link + assert self.make_timegate_link(url, '') in links + + + resp = resp.follow() + + # Body + assert '"20140127171238"' in resp.text + assert '"http://www.iana.org/"' in resp.text, resp.text + + def test_memento_top_frame_timegate_accept_dt(self, fmod): + headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'} + resp = self.testapp.get('/pywb/http://www.iana.org/', headers=headers) + assert resp.status_code == 307 + assert resp.headers['Location'].endswith('/20140126200624/http://www.iana.org/') + assert resp.headers['Link'] != '' + + # Memento Headers + assert VARY in resp.headers + assert MEMENTO_DATETIME not in resp.headers + + # memento link + dt = 'Sun, 26 Jan 2014 20:06:24 GMT' + url = 'http://www.iana.org/' + + links = self.get_links(resp) + + assert self.make_memento_link(url, '20140126200624', dt, 'mp_', include_coll=False) in links + + #timegate link + assert self.make_timegate_link(url, '') in links + + + resp = resp.follow() + + # Body + assert '"20140126200624"' in resp.text + assert '"http://www.iana.org/"' in resp.text, resp.text + + def test_memento_not_time_gate(self, fmod): + headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'} + resp = self.testapp.get('/pywb/2/http://www.iana.org/', headers=headers) + assert resp.status_code == 200 + + def test_timegate_error_not_found(self): + resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404) + assert resp.status_code == 404 +