diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index c590825a..56167906 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -345,6 +345,7 @@ class RewriterApp(object): content_rw, is_proxy) response = None + keep_frame_response = False # prefer overrides custom response? if pref_mod is not None: @@ -360,13 +361,22 @@ class RewriterApp(object): else: wb_url.mod = pref_mod else: - # don't return top-frame response for timegate with exact redirects - kwargs['is_timegate_redir'] = is_timegate and redirect_to_exact - response = self.handle_custom_response(environ, wb_url, - full_prefix, host_prefix, - kwargs) + if kwargs.get('output'): + response = self.handle_timemap(wb_url, kwargs, full_prefix) - if response: + elif wb_url.is_query(): + response = self.handle_query(environ, wb_url, kwargs, full_prefix) + + else: + # don't return top-frame response for timegate with exact redirects + if not (is_timegate and redirect_to_exact): + keep_frame_response = is_timegate and not redirect_to_exact and not is_proxy + response = self.handle_custom_response(environ, wb_url, + full_prefix, host_prefix, + kwargs) + + + if response and not keep_frame_response: return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy) if is_proxy: @@ -443,6 +453,11 @@ class RewriterApp(object): return self.send_redirect(new_path, url_parts, urlrewriter) + # return top-frame timegate response, with timestamp from cdx + if response and keep_frame_response: + no_except_close(r.raw) + return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp']) + stream = BufferedReader(r.raw, block_size=BUFF_SIZE) record = self.loader.parse_record_stream(stream, ensure_http_headers=True) @@ -560,7 +575,7 @@ class RewriterApp(object): return response - def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy): + def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy, timegate_closest_ts=None): memento_ts = None if not isinstance(response, WbResponse): content_type = 'text/html' @@ -569,13 +584,13 @@ class RewriterApp(object): if not self.is_framed_replay(wb_url): content_type += '; charset=utf-8' else: - memento_ts = wb_url.timestamp + memento_ts = timegate_closest_ts or wb_url.timestamp response = WbResponse.text_response(response, content_type=content_type) if self.enable_memento and response.status_headers.statusline.startswith('200'): self._add_memento_links(wb_url.url, full_prefix, None, memento_ts, - response.status_headers, is_timegate, is_proxy) + response.status_headers, is_timegate, is_proxy, is_memento=not is_timegate) return response def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts, @@ -873,13 +888,7 @@ class RewriterApp(object): return {'metadata': kwargs.get('metadata', {})} def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs): - if kwargs.get('output'): - return self.handle_timemap(wb_url, kwargs, full_prefix) - - if wb_url.is_query(): - return self.handle_query(environ, wb_url, kwargs, full_prefix) - - if self.is_framed_replay(wb_url) and not kwargs.get('is_timegate_redir'): + if self.is_framed_replay(wb_url): extra_params = self.get_top_frame_params(wb_url, kwargs) return self.frame_insert_view.get_top_frame(wb_url, full_prefix, diff --git a/tests/test_memento.py b/tests/test_memento.py index 7c6d6164..adb1d921 100644 --- a/tests/test_memento.py +++ b/tests/test_memento.py @@ -63,6 +63,28 @@ class TestMemento(MementoMixin, BaseConfigTest): assert '"20140127171238"' in resp.text assert '"http://www.iana.org/"' in resp.text, resp.text + def test_memento_top_frame_timegate(self): + resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css') + + # vary header + assert VARY in resp.headers + + # no memento header, as not really a memento (top-frame) + assert MEMENTO_DATETIME not in resp.headers + + # Memento Headers + # memento link + dt = 'Mon, 27 Jan 2014 17:12:39 GMT' + url = 'http://www.iana.org/_css/2013.1/screen.css' + + links = self.get_links(resp) + + assert self.make_memento_link(url, '20140127171239', dt, 'mp_', include_coll=False) in links + + #timegate link + assert self.make_timegate_link(url, '') in links + + def test_memento_content_replay_exact(self, fmod): resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod) @@ -175,6 +197,15 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", " resp = self._timemap_get('/pywb/timemap/foo/http://example.com', status=400) assert resp.json == {'message': 'output=foo not supported'} + def test_timegate_error_not_found(self): + resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404) + assert resp.status_code == 404 + + # No Memento Headers + assert VARY not in resp.headers + assert MEMENTO_DATETIME not in resp.headers + assert 'Link' not in resp.headers + def test_error_bad_accept_datetime(self): """ 400 response for bad accept_datetime