mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento timegate: make timegate headers for /<coll>/<url> behave correctly per-memento spec, (#564)
return 404 if not found, return latest memento header. do this by performing actual response lookup, but then returning the top frame response if succeeded. addresses ukwa/ukwa-pywb#58
This commit is contained in:
parent
5e9b13e267
commit
3c53c2731b
@ -345,6 +345,7 @@ class RewriterApp(object):
|
||||
content_rw, is_proxy)
|
||||
|
||||
response = None
|
||||
keep_frame_response = False
|
||||
|
||||
# prefer overrides custom response?
|
||||
if pref_mod is not None:
|
||||
@ -359,14 +360,23 @@ class RewriterApp(object):
|
||||
headers=headers)
|
||||
else:
|
||||
wb_url.mod = pref_mod
|
||||
else:
|
||||
if kwargs.get('output'):
|
||||
response = self.handle_timemap(wb_url, kwargs, full_prefix)
|
||||
|
||||
elif wb_url.is_query():
|
||||
response = self.handle_query(environ, wb_url, kwargs, full_prefix)
|
||||
|
||||
else:
|
||||
# don't return top-frame response for timegate with exact redirects
|
||||
kwargs['is_timegate_redir'] = is_timegate and redirect_to_exact
|
||||
if not (is_timegate and redirect_to_exact):
|
||||
keep_frame_response = is_timegate and not redirect_to_exact and not is_proxy
|
||||
response = self.handle_custom_response(environ, wb_url,
|
||||
full_prefix, host_prefix,
|
||||
kwargs)
|
||||
|
||||
if response:
|
||||
|
||||
if response and not keep_frame_response:
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||
|
||||
if is_proxy:
|
||||
@ -443,6 +453,11 @@ class RewriterApp(object):
|
||||
|
||||
return self.send_redirect(new_path, url_parts, urlrewriter)
|
||||
|
||||
# return top-frame timegate response, with timestamp from cdx
|
||||
if response and keep_frame_response:
|
||||
no_except_close(r.raw)
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp'])
|
||||
|
||||
stream = BufferedReader(r.raw, block_size=BUFF_SIZE)
|
||||
record = self.loader.parse_record_stream(stream,
|
||||
ensure_http_headers=True)
|
||||
@ -560,7 +575,7 @@ class RewriterApp(object):
|
||||
|
||||
return response
|
||||
|
||||
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy):
|
||||
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy, timegate_closest_ts=None):
|
||||
memento_ts = None
|
||||
if not isinstance(response, WbResponse):
|
||||
content_type = 'text/html'
|
||||
@ -569,13 +584,13 @@ class RewriterApp(object):
|
||||
if not self.is_framed_replay(wb_url):
|
||||
content_type += '; charset=utf-8'
|
||||
else:
|
||||
memento_ts = wb_url.timestamp
|
||||
memento_ts = timegate_closest_ts or wb_url.timestamp
|
||||
|
||||
response = WbResponse.text_response(response, content_type=content_type)
|
||||
|
||||
if self.enable_memento and response.status_headers.statusline.startswith('200'):
|
||||
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
||||
response.status_headers, is_timegate, is_proxy)
|
||||
response.status_headers, is_timegate, is_proxy, is_memento=not is_timegate)
|
||||
return response
|
||||
|
||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||
@ -873,13 +888,7 @@ class RewriterApp(object):
|
||||
return {'metadata': kwargs.get('metadata', {})}
|
||||
|
||||
def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
|
||||
if kwargs.get('output'):
|
||||
return self.handle_timemap(wb_url, kwargs, full_prefix)
|
||||
|
||||
if wb_url.is_query():
|
||||
return self.handle_query(environ, wb_url, kwargs, full_prefix)
|
||||
|
||||
if self.is_framed_replay(wb_url) and not kwargs.get('is_timegate_redir'):
|
||||
if self.is_framed_replay(wb_url):
|
||||
extra_params = self.get_top_frame_params(wb_url, kwargs)
|
||||
return self.frame_insert_view.get_top_frame(wb_url,
|
||||
full_prefix,
|
||||
|
@ -63,6 +63,28 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
assert '"20140127171238"' in resp.text
|
||||
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||
|
||||
def test_memento_top_frame_timegate(self):
|
||||
resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css')
|
||||
|
||||
# vary header
|
||||
assert VARY in resp.headers
|
||||
|
||||
# no memento header, as not really a memento (top-frame)
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
|
||||
# Memento Headers
|
||||
# memento link
|
||||
dt = 'Mon, 27 Jan 2014 17:12:39 GMT'
|
||||
url = 'http://www.iana.org/_css/2013.1/screen.css'
|
||||
|
||||
links = self.get_links(resp)
|
||||
|
||||
assert self.make_memento_link(url, '20140127171239', dt, 'mp_', include_coll=False) in links
|
||||
|
||||
#timegate link
|
||||
assert self.make_timegate_link(url, '') in links
|
||||
|
||||
|
||||
def test_memento_content_replay_exact(self, fmod):
|
||||
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod)
|
||||
|
||||
@ -175,6 +197,15 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "
|
||||
resp = self._timemap_get('/pywb/timemap/foo/http://example.com', status=400)
|
||||
assert resp.json == {'message': 'output=foo not supported'}
|
||||
|
||||
def test_timegate_error_not_found(self):
|
||||
resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404)
|
||||
assert resp.status_code == 404
|
||||
|
||||
# No Memento Headers
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
assert 'Link' not in resp.headers
|
||||
|
||||
def test_error_bad_accept_datetime(self):
|
||||
"""
|
||||
400 response for bad accept_datetime
|
||||
|
Loading…
x
Reference in New Issue
Block a user