mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento-fix: fix for ukwa/ukwa-pywb#37.
- support memento timegate on top-frame (when no timestamp is provided) - treat top-frame no-timestamp url as canonical timegate - tests: update tests, add memento redirect mode tests for timegate, timegate with accept-dt header
This commit is contained in:
parent
0c08b9b5d5
commit
ce0ed610bd
@ -302,7 +302,9 @@ class RewriterApp(object):
|
|||||||
kwargs)
|
kwargs)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
# don't return top-frame response for timegate with exact redirects
|
||||||
|
if not is_timegate or not redirect_to_exact:
|
||||||
|
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||||
|
|
||||||
if is_proxy:
|
if is_proxy:
|
||||||
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
||||||
@ -395,11 +397,9 @@ class RewriterApp(object):
|
|||||||
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
||||||
set_content_loc = True
|
set_content_loc = True
|
||||||
|
|
||||||
# if redir to exact, redir if url or ts are different
|
# if redirect to exact timestamp, bit only if not live
|
||||||
if redirect_to_exact:
|
if redirect_to_exact and not cdx.get('is_live'):
|
||||||
if (set_content_loc or
|
if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'):
|
||||||
(wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))):
|
|
||||||
|
|
||||||
new_url = urlrewriter.get_new_url(url=target_uri,
|
new_url = urlrewriter.get_new_url(url=target_uri,
|
||||||
timestamp=cdx['timestamp'],
|
timestamp=cdx['timestamp'],
|
||||||
mod=wb_url.mod)
|
mod=wb_url.mod)
|
||||||
@ -412,7 +412,8 @@ class RewriterApp(object):
|
|||||||
resp.status_headers,
|
resp.status_headers,
|
||||||
is_timegate, is_proxy,
|
is_timegate, is_proxy,
|
||||||
pref_applied=pref_applied,
|
pref_applied=pref_applied,
|
||||||
mod=pref_mod)
|
mod=pref_mod,
|
||||||
|
is_memento=False)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
||||||
@ -512,21 +513,22 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
||||||
status_headers, is_timegate, is_proxy, coll=None,
|
status_headers, is_timegate, is_proxy, coll=None,
|
||||||
pref_applied=None, mod=None):
|
pref_applied=None, mod=None, is_memento=True):
|
||||||
|
|
||||||
mod = mod or self.replay_mod
|
replay_mod = mod or self.replay_mod
|
||||||
|
|
||||||
# memento url + header
|
# memento url + header
|
||||||
if not memento_dt and memento_ts:
|
if not memento_dt and memento_ts:
|
||||||
memento_dt = timestamp_to_http_date(memento_ts)
|
memento_dt = timestamp_to_http_date(memento_ts)
|
||||||
|
|
||||||
if memento_dt:
|
if memento_dt:
|
||||||
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
if is_memento:
|
||||||
|
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
||||||
|
|
||||||
if is_proxy:
|
if is_proxy:
|
||||||
memento_url = url
|
memento_url = url
|
||||||
else:
|
else:
|
||||||
memento_url = full_prefix + memento_ts + mod
|
memento_url = full_prefix + memento_ts + replay_mod
|
||||||
memento_url += '/' + url
|
memento_url += '/' + url
|
||||||
else:
|
else:
|
||||||
memento_url = None
|
memento_url = None
|
||||||
@ -560,6 +562,7 @@ class RewriterApp(object):
|
|||||||
def _get_timegate_timemap(self, url, full_prefix, mod):
|
def _get_timegate_timemap(self, url, full_prefix, mod):
|
||||||
# timegate url
|
# timegate url
|
||||||
timegate_url = full_prefix
|
timegate_url = full_prefix
|
||||||
|
mod = ''
|
||||||
if mod:
|
if mod:
|
||||||
timegate_url += mod + '/'
|
timegate_url += mod + '/'
|
||||||
|
|
||||||
@ -653,7 +656,7 @@ class RewriterApp(object):
|
|||||||
status = str(res.status_code) + ' ' + res.reason
|
status = str(res.status_code) + ' ' + res.reason
|
||||||
|
|
||||||
if res.status_code == 200 and output == 'link':
|
if res.status_code == 200 and output == 'link':
|
||||||
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, self.replay_mod)
|
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix, wb_url.mod)
|
||||||
|
|
||||||
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
||||||
timegate,
|
timegate,
|
||||||
|
@ -33,7 +33,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
assert resp.headers['Content-Location'] in memento_link
|
assert resp.headers['Content-Location'] in memento_link
|
||||||
|
|
||||||
# timegate link
|
# timegate link
|
||||||
assert self.make_timegate_link(url, fmod) in links
|
assert self.make_timegate_link(url, '') in links
|
||||||
|
|
||||||
# timemap link
|
# timemap link
|
||||||
assert self.make_timemap_link(url) in links
|
assert self.make_timemap_link(url) in links
|
||||||
@ -60,7 +60,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links
|
assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links
|
||||||
|
|
||||||
#timegate link
|
#timegate link
|
||||||
assert self.make_timegate_link(url, 'mp_') in links
|
assert self.make_timegate_link(url, '') in links
|
||||||
|
|
||||||
# Body
|
# Body
|
||||||
assert '"20140127171238"' in resp.text
|
assert '"20140127171238"' in resp.text
|
||||||
@ -132,7 +132,7 @@ class TestMemento(MementoMixin, BaseConfigTest):
|
|||||||
|
|
||||||
exp = """\
|
exp = """\
|
||||||
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
<http://localhost:80/pywb/timemap/link/http://example.com?example=1>; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT",
|
||||||
<http://localhost:80/pywb/mp_/http://example.com?example=1>; rel="timegate",
|
<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",
|
||||||
<http://example.com?example=1>; rel="original",
|
<http://example.com?example=1>; rel="original",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb",
|
||||||
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
<http://example.com?example=1>; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb"
|
||||||
@ -186,3 +186,75 @@ com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "
|
|||||||
assert resp.status_int == 400
|
assert resp.status_int == 400
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestMementoRedirectClassic, cls).setup_class('config_test_redirect_classic.yaml')
|
||||||
|
|
||||||
|
def test_memento_top_frame_timegate(self, fmod):
|
||||||
|
resp = self.testapp.get('/pywb/http://www.iana.org/')
|
||||||
|
assert resp.status_code == 307
|
||||||
|
assert resp.headers['Location'].endswith('/20140127171238/http://www.iana.org/')
|
||||||
|
assert resp.headers['Link'] != ''
|
||||||
|
|
||||||
|
# Memento Headers
|
||||||
|
assert VARY in resp.headers
|
||||||
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
|
# memento link
|
||||||
|
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
|
||||||
|
url = 'http://www.iana.org/'
|
||||||
|
|
||||||
|
links = self.get_links(resp)
|
||||||
|
|
||||||
|
assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links
|
||||||
|
|
||||||
|
#timegate link
|
||||||
|
assert self.make_timegate_link(url, '') in links
|
||||||
|
|
||||||
|
|
||||||
|
resp = resp.follow()
|
||||||
|
|
||||||
|
# Body
|
||||||
|
assert '"20140127171238"' in resp.text
|
||||||
|
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||||
|
|
||||||
|
def test_memento_top_frame_timegate_accept_dt(self, fmod):
|
||||||
|
headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'}
|
||||||
|
resp = self.testapp.get('/pywb/http://www.iana.org/', headers=headers)
|
||||||
|
assert resp.status_code == 307
|
||||||
|
assert resp.headers['Location'].endswith('/20140126200624/http://www.iana.org/')
|
||||||
|
assert resp.headers['Link'] != ''
|
||||||
|
|
||||||
|
# Memento Headers
|
||||||
|
assert VARY in resp.headers
|
||||||
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
|
# memento link
|
||||||
|
dt = 'Sun, 26 Jan 2014 20:06:24 GMT'
|
||||||
|
url = 'http://www.iana.org/'
|
||||||
|
|
||||||
|
links = self.get_links(resp)
|
||||||
|
|
||||||
|
assert self.make_memento_link(url, '20140126200624', dt, 'mp_', include_coll=False) in links
|
||||||
|
|
||||||
|
#timegate link
|
||||||
|
assert self.make_timegate_link(url, '') in links
|
||||||
|
|
||||||
|
|
||||||
|
resp = resp.follow()
|
||||||
|
|
||||||
|
# Body
|
||||||
|
assert '"20140126200624"' in resp.text
|
||||||
|
assert '"http://www.iana.org/"' in resp.text, resp.text
|
||||||
|
|
||||||
|
def test_memento_not_time_gate(self, fmod):
|
||||||
|
headers = {'Accept-Datetime': 'Sun, 26 Jan 2014 20:06:24 GMT'}
|
||||||
|
resp = self.testapp.get('/pywb/2/http://www.iana.org/', headers=headers)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
|
||||||
|
def test_timegate_error_not_found(self):
|
||||||
|
resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404)
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user