mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento-fix: fix for ukwa/ukwa-pywb#37.
- support memento timegate on top-frame (when no timestamp is provided) - treat top-frame no-timestamp url as canonical timegate - tests: update tests, add memento redirect mode tests for timegate, timegate with accept-dt header
This commit is contained in:
parent
0c08b9b5d5
commit
ce0ed610bd
@ -302,7 +302,9 @@ class RewriterApp(object):
|
||||
kwargs)
|
||||
|
||||
if response:
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||
# don't return top-frame response for timegate with exact redirects
|
||||
if not is_timegate or not redirect_to_exact:
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||
|
||||
if is_proxy:
|
||||
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
||||
@ -395,11 +397,9 @@ class RewriterApp(object):
|
||||
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
||||
set_content_loc = True
|
||||
|
||||
# if redir to exact, redir if url or ts are different
|
||||
if redirect_to_exact:
|
||||
if (set_content_loc or
|
||||
(wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))):
|
||||
|
||||
# if redirect to exact timestamp, bit only if not live
|
||||
if redirect_to_exact and not cdx.get('is_live'):
|
||||
if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'):
|
||||
new_url = urlrewriter.get_new_url(url=target_uri,
|
||||
timestamp=cdx['timestamp'],
|
||||
mod=wb_url.mod)
|
||||
@ -412,7 +412,8 @@ class RewriterApp(object):
|
||||
resp.status_headers,
|
||||
is_timegate, is_proxy,
|
||||
pref_applied=pref_applied,
|
||||
mod=pref_mod)
|
||||
mod=pref_mod,
|
||||
is_memento=False)
|
||||
|
||||
else:
|
||||
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
||||
@ -512,21 +513,22 @@ class RewriterApp(object):
|
||||
|
||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||
status_headers, is_timegate, is_proxy, coll=None,
|
||||
pref_applied=None, mod=None):
|
||||
pref_applied=None, mod=None, is_memento=True):
|
||||
|
||||
mod = mod or self.replay_mod
|
||||
replay_mod = mod or self.replay_mod
|
||||
|
||||
# memento url + header
|
||||
if not memento_dt and memento_ts:
|
||||
memento_dt = timestamp_to_http_date(memento_ts)
|
||||
|
||||
if memento_dt:
|
||||
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
||||
if is_memento:
|
||||
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
||||
|
||||
if is_proxy:
|
||||
memento_url = url
|
||||
else:
|
||||
memento_url = full_prefix + memento_ts + mod
|
||||
memento_url = full_prefix + memento_ts + replay_mod
|
||||
memento_url += '/' + url
|
||||
else:
|
||||
memento_url = None
|
||||
@ -560,6 +562,7 @@ class RewriterApp(object):
|
||||
def _get_timegate_timemap(self, url, full_prefix, mod):
|
||||
# timegate url
|
||||
timegate_url = full_prefix
|
||||
mod = ''
|
||||
if mod:
|
||||
timegate_url += mod + '/'
|
||||
|
||||
@ -653,7 +656,7 @@ class RewriterApp(object):
|
||||
status = str(res.status_code) + ' ' + res.reason
|
||||
|
||||
if res.status_code == 200 and output == 'link':
|
||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, self.replay_mod)
|
||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, wb_url.mod)
|
||||
|
||||
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
||||
timegate,
|
||||
|
@ -33,7 +33,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
assert resp.headers['Content-Location'] in memento_link
|
||||
|
||||
# timegate link
|
||||
assert self.make_timegate_link(url, fmod) in links
|
||||
assert self.make_timegate_link(url, '') in links
|
||||
|
||||
# timemap link
|
||||
assert self.make_timemap_link(url) in links
|
||||
@ -60,7 +60,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links
|
||||
|
||||
#timegate link
|
||||
assert self.make_timegate_link(url, 'mp_') in links
|
||||
assert self.make_timegate_link(url, '') in links
|
||||
|
||||
# Body
|
||||
assert '"20140127171238"' in resp.text
|
||||
@ -132,7 +132,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
||||
|
||||
exp = """\
|
||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||
<http://localhost:80/pywb/mp_/http://example.com?example=1>; rel="timegate",
|
||||
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||
<http://example.com?example=1>; rel="original",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||
@ -186,3 +186,75 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "
|
||||
assert resp.status_int == 400
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
||||
|
||||
def test_memento_top_frame_timegate(self, fmod):
|
||||
resp = self.testapp.get('/pywb/http://www.iana.org/')
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20140127171238/http://www.iana.org/')
|
||||
assert resp.headers['Link'] != ''
|
||||
|
||||
# Memento Headers
|
||||
assert VARY in resp.headers
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
|
||||
# memento link
|
||||
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
|
||||
url = 'http://www.iana.org/'
|
||||
|
||||
links = self.get_links(resp)
|
||||
|
||||
assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links
|
||||
|
||||
#timegate link
|
||||
assert self.make_timegate_link(url, '') in links
|
||||
|
||||
|
||||
resp = resp.follow()
|
||||
|
||||
# Body
|
||||
assert '"20140127171238"' in resp.text
|
||||
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||
|
||||
def test_memento_top_frame_timegate_accept_dt(self, fmod):
|
||||
headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'}
|
||||
resp = self.testapp.get('/pywb/http://www.iana.org/', headers=headers)
|
||||
assert resp.status_code == 307
|
||||
assert resp.headers['Location'].endswith('/20140126200624/http://www.iana.org/')
|
||||
assert resp.headers['Link'] != ''
|
||||
|
||||
# Memento Headers
|
||||
assert VARY in resp.headers
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
|
||||
# memento link
|
||||
dt = 'Sun, 26 Jan 2014 20:06:24 GMT'
|
||||
url = 'http://www.iana.org/'
|
||||
|
||||
links = self.get_links(resp)
|
||||
|
||||
assert self.make_memento_link(url, '20140126200624', dt, 'mp_', include_coll=False) in links
|
||||
|
||||
#timegate link
|
||||
assert self.make_timegate_link(url, '') in links
|
||||
|
||||
|
||||
resp = resp.follow()
|
||||
|
||||
# Body
|
||||
assert '"20140126200624"' in resp.text
|
||||
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||
|
||||
def test_memento_not_time_gate(self, fmod):
|
||||
headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'}
|
||||
resp = self.testapp.get('/pywb/2/http://www.iana.org/', headers=headers)
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_timegate_error_not_found(self):
|
||||
resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user