mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
memento: when redir_to_exact
is false, don't redirect latest replay/timegate to current timestamp, but return directly latest capture.
when memento enabled, the timegate now follows memento pattern 2.2 (http://tools.ietf.org/html/rfc7089#section-4.2.2) also return content-location instead of location, update memento no-redirect tests to match new behavior. closes #122
This commit is contained in:
parent
3b94f32a7f
commit
66f5ad62b3
@ -42,6 +42,7 @@ class MementoRequest(MementoReqMixin, WbRequest):
|
|||||||
class MementoRespMixin(object):
|
class MementoRespMixin(object):
|
||||||
def _init_derived(self, params):
|
def _init_derived(self, params):
|
||||||
wbrequest = params.get('wbrequest')
|
wbrequest = params.get('wbrequest')
|
||||||
|
is_redirect = params.get('memento_is_redir', False)
|
||||||
cdx = params.get('cdx')
|
cdx = params.get('cdx')
|
||||||
|
|
||||||
if not wbrequest or not wbrequest.wb_url:
|
if not wbrequest or not wbrequest.wb_url:
|
||||||
@ -50,7 +51,7 @@ class MementoRespMixin(object):
|
|||||||
mod = wbrequest.options.get('replay_mod', '')
|
mod = wbrequest.options.get('replay_mod', '')
|
||||||
|
|
||||||
#is_top_frame = wbrequest.wb_url.is_top_frame
|
#is_top_frame = wbrequest.wb_url.is_top_frame
|
||||||
is_top_frame = wbrequest.options.get('is_top_frame')
|
is_top_frame = wbrequest.options.get('is_top_frame', False)
|
||||||
|
|
||||||
is_timegate = (wbrequest.options.get('is_timegate', False) and
|
is_timegate = (wbrequest.options.get('is_timegate', False) and
|
||||||
not is_top_frame)
|
not is_top_frame)
|
||||||
@ -60,6 +61,7 @@ class MementoRespMixin(object):
|
|||||||
|
|
||||||
# Determine if memento:
|
# Determine if memento:
|
||||||
is_memento = False
|
is_memento = False
|
||||||
|
is_original = False
|
||||||
|
|
||||||
# if no cdx included, not a memento, unless top-frame special
|
# if no cdx included, not a memento, unless top-frame special
|
||||||
if not cdx:
|
if not cdx:
|
||||||
@ -71,10 +73,13 @@ class MementoRespMixin(object):
|
|||||||
# otherwise, if in proxy mode, then always a memento
|
# otherwise, if in proxy mode, then always a memento
|
||||||
elif wbrequest.options['is_proxy']:
|
elif wbrequest.options['is_proxy']:
|
||||||
is_memento = True
|
is_memento = True
|
||||||
|
is_original = True
|
||||||
|
|
||||||
# otherwise only if timestamp replay (and not a timegate)
|
# otherwise only if timestamp replay (and not a timegate)
|
||||||
elif not is_timegate:
|
#elif not is_timegate:
|
||||||
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
# is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
||||||
|
elif not is_redirect:
|
||||||
|
is_memento = (wbrequest.wb_url.is_replay())
|
||||||
|
|
||||||
link = []
|
link = []
|
||||||
req_url = wbrequest.wb_url.url
|
req_url = wbrequest.wb_url.url
|
||||||
@ -101,11 +106,18 @@ class MementoRespMixin(object):
|
|||||||
timestamp=ts,
|
timestamp=ts,
|
||||||
url=url)
|
url=url)
|
||||||
|
|
||||||
link.append(self.make_memento_link(canon_link,
|
# Must set content location
|
||||||
'memento',
|
if is_memento and is_timegate:
|
||||||
http_date))
|
self.status_headers.headers.append(('Content-Location',
|
||||||
|
canon_link))
|
||||||
|
|
||||||
if is_memento and is_timegate:
|
# don't set memento link for very long urls...
|
||||||
|
if len(canon_link) < 512:
|
||||||
|
link.append(self.make_memento_link(canon_link,
|
||||||
|
'memento',
|
||||||
|
http_date))
|
||||||
|
|
||||||
|
if is_original and is_timegate:
|
||||||
link.append(self.make_link(req_url, 'original timegate'))
|
link.append(self.make_link(req_url, 'original timegate'))
|
||||||
else:
|
else:
|
||||||
link.append(self.make_link(req_url, 'original'))
|
link.append(self.make_link(req_url, 'original'))
|
||||||
|
@ -233,6 +233,9 @@ class ReplayView(object):
|
|||||||
return chain(iter([content]), iterator)
|
return chain(iter([content]), iterator)
|
||||||
|
|
||||||
def _redirect_if_needed(self, wbrequest, cdx):
|
def _redirect_if_needed(self, wbrequest, cdx):
|
||||||
|
if not self.redir_to_exact:
|
||||||
|
return None
|
||||||
|
|
||||||
if wbrequest.options['is_proxy']:
|
if wbrequest.options['is_proxy']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -243,10 +246,7 @@ class ReplayView(object):
|
|||||||
if not is_timegate:
|
if not is_timegate:
|
||||||
is_timegate = wbrequest.wb_url.is_latest_replay()
|
is_timegate = wbrequest.wb_url.is_latest_replay()
|
||||||
|
|
||||||
redir_needed = is_timegate
|
redir_needed = is_timegate or (cdx['timestamp'] != wbrequest.wb_url.timestamp)
|
||||||
|
|
||||||
if not redir_needed and self.redir_to_exact:
|
|
||||||
redir_needed = (cdx['timestamp'] != wbrequest.wb_url.timestamp)
|
|
||||||
|
|
||||||
if not redir_needed:
|
if not redir_needed:
|
||||||
return None
|
return None
|
||||||
@ -254,10 +254,10 @@ class ReplayView(object):
|
|||||||
if self.enable_range_cache and wbrequest.extract_range():
|
if self.enable_range_cache and wbrequest.extract_range():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if is_timegate and not self.redir_to_exact:
|
#if is_timegate:
|
||||||
timestamp = timestamp_now()
|
# timestamp = timestamp_now()
|
||||||
else:
|
#else:
|
||||||
timestamp = cdx['timestamp']
|
timestamp = cdx['timestamp']
|
||||||
|
|
||||||
new_url = (wbrequest.urlrewriter.
|
new_url = (wbrequest.urlrewriter.
|
||||||
get_new_url(timestamp=timestamp,
|
get_new_url(timestamp=timestamp,
|
||||||
@ -279,7 +279,8 @@ class ReplayView(object):
|
|||||||
|
|
||||||
return self.response_class(status_headers,
|
return self.response_class(status_headers,
|
||||||
wbrequest=wbrequest,
|
wbrequest=wbrequest,
|
||||||
cdx=cdx)
|
cdx=cdx,
|
||||||
|
memento_is_redir=True)
|
||||||
|
|
||||||
def _reject_self_redirect(self, wbrequest, cdx, status_headers):
|
def _reject_self_redirect(self, wbrequest, cdx, status_headers):
|
||||||
"""
|
"""
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
collections:
|
collections:
|
||||||
pywb: ./sample_archive/cdx/
|
pywb: ./sample_archive/cdx/
|
||||||
|
|
||||||
pywb-non-exact:
|
pywb-no-redir:
|
||||||
index_paths: ./sample_archive/cdx/
|
index_paths: ./sample_archive/cdx/
|
||||||
redir_to_exact: false
|
redir_to_exact: false
|
||||||
|
|
||||||
|
@ -43,30 +43,29 @@ class TestMemento(MementoMixin):
|
|||||||
assert '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
assert '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
# timegate with latest memento, but redirect to current timestamp url instead of
|
# timegate with latest memento, but no redirect
|
||||||
# memento timestamp
|
def test_timegate_memento_no_redir_latest(self):
|
||||||
def test_timegate_latest_request_timestamp(self):
|
|
||||||
"""
|
"""
|
||||||
TimeGate with no Accept-Datetime header
|
TimeGate with no Accept-Datetime header
|
||||||
"""
|
"""
|
||||||
|
|
||||||
dt = 'Mon, 27 Jan 2014 17:12:39 GMT'
|
dt = 'Mon, 27 Jan 2014 17:12:39 GMT'
|
||||||
resp = self.testapp.get('/pywb-non-exact/http://www.iana.org/_css/2013.1/screen.css')
|
resp = self.testapp.get('/pywb-no-redir/http://www.iana.org/_css/2013.1/screen.css')
|
||||||
|
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 200
|
||||||
|
|
||||||
assert resp.headers[VARY] == 'accept-datetime'
|
assert resp.headers[VARY] == 'accept-datetime'
|
||||||
|
|
||||||
links = self.get_links(resp)
|
links = self.get_links(resp)
|
||||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css', coll='pywb-non-exact') in links
|
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css', coll='pywb-no-redir') in links
|
||||||
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140127171239', dt, coll='pywb-non-exact') in links
|
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140127171239', dt, coll='pywb-no-redir') in links
|
||||||
|
|
||||||
assert MEMENTO_DATETIME not in resp.headers
|
assert MEMENTO_DATETIME in resp.headers
|
||||||
|
|
||||||
assert '/pywb-non-exact/' in resp.headers['Location']
|
assert '/pywb-no-redir/' in resp.headers['Content-Location']
|
||||||
|
|
||||||
wburl = resp.headers['Location'].split('/pywb-non-exact/')[-1]
|
wburl = resp.headers['Content-Location'].split('/pywb-no-redir/')[-1]
|
||||||
ts = wburl.split('/')[0]
|
ts = wburl.split('/')[0]
|
||||||
assert len(ts) == 14
|
assert len(ts) == 14
|
||||||
assert timestamp_now() >= ts
|
assert timestamp_now() >= ts
|
||||||
@ -115,6 +114,28 @@ class TestMemento(MementoMixin):
|
|||||||
assert '/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
assert '/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
|
def test_timegate_memento_no_redir_accept_datetime_inexact(self):
|
||||||
|
"""
|
||||||
|
TimeGate with Accept-Datetime header, not matching a memento exactly, no redirect
|
||||||
|
"""
|
||||||
|
dt = 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||||
|
request_dt = 'Sun, 26 Jan 2014 20:08:00 GMT'
|
||||||
|
headers = {ACCEPT_DATETIME: request_dt}
|
||||||
|
resp = self.testapp.get('/pywb-no-redir/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||||
|
|
||||||
|
assert resp.status_int == 200
|
||||||
|
|
||||||
|
assert resp.headers[VARY] == 'accept-datetime'
|
||||||
|
|
||||||
|
links = self.get_links(resp)
|
||||||
|
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||||
|
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css', coll='pywb-no-redir') in links
|
||||||
|
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140126200804', dt, coll='pywb-no-redir') == links[0], links[0]
|
||||||
|
|
||||||
|
assert MEMENTO_DATETIME in resp.headers
|
||||||
|
|
||||||
|
assert '/pywb-no-redir/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Content-Location']
|
||||||
|
|
||||||
def test_non_timegate_intermediate_redir(self):
|
def test_non_timegate_intermediate_redir(self):
|
||||||
"""
|
"""
|
||||||
Not a timegate, but an 'intermediate resource', redirect to closest timestamp
|
Not a timegate, but an 'intermediate resource', redirect to closest timestamp
|
||||||
|
Loading…
x
Reference in New Issue
Block a user