mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento: fix headers to be more consistent for framed replay. when using
frames, outer frames 'mirrors' mementos of the inner frame to be discoverable by client side memento tools, tracked via #70
This commit is contained in:
parent
757345d317
commit
55426e7619
@ -74,14 +74,23 @@ class MementoRespMixin(object):
|
||||
|
||||
link = []
|
||||
|
||||
if is_memento and cdx:
|
||||
http_date = timestamp_to_http_date(cdx['timestamp'])
|
||||
self.status_headers.headers.append(('Memento-Datetime', http_date))
|
||||
if is_memento:
|
||||
if cdx:
|
||||
http_date = timestamp_to_http_date(cdx['timestamp'])
|
||||
# for top frame
|
||||
elif wbrequest.wb_url.timestamp:
|
||||
http_date = timestamp_to_http_date(wbrequest.wb_url.timestamp)
|
||||
else:
|
||||
http_date = None
|
||||
|
||||
elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
|
||||
# top frame special case
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='')
|
||||
link.append(self.make_link(canon_link, 'memento'))
|
||||
if http_date:
|
||||
self.status_headers.headers.append(('Memento-Datetime',
|
||||
http_date))
|
||||
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='')
|
||||
link.append(self.make_memento_link(canon_link,
|
||||
'memento',
|
||||
http_date))
|
||||
|
||||
req_url = wbrequest.wb_url.url
|
||||
|
||||
@ -105,6 +114,9 @@ class MementoRespMixin(object):
|
||||
def make_link(self, url, type):
|
||||
return '<{0}>; rel="{1}"'.format(url, type)
|
||||
|
||||
def make_memento_link(self, url, type_, dt):
|
||||
return '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type_, dt)
|
||||
|
||||
def make_timemap_link(self, wbrequest):
|
||||
format_ = '<{0}>; rel="timemap"; type="{1}"'
|
||||
|
||||
@ -121,7 +133,7 @@ class MementoResponse(MementoRespMixin, WbResponse):
|
||||
|
||||
|
||||
#=================================================================
|
||||
def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
|
||||
def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
|
||||
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
|
||||
|
||||
string = WbUrl.to_wburl_str(url=cdx['original'],
|
||||
@ -161,17 +173,17 @@ def make_timemap(wbrequest, cdx_lines):
|
||||
yield timegate.format(prefix + url)
|
||||
|
||||
# first memento link
|
||||
yield make_memento_link(first_cdx, prefix,
|
||||
yield make_timemap_memento_link(first_cdx, prefix,
|
||||
datetime=from_date)
|
||||
|
||||
prev_cdx = None
|
||||
|
||||
for cdx in cdx_lines:
|
||||
if prev_cdx:
|
||||
yield make_memento_link(prev_cdx, prefix)
|
||||
yield make_timemap_memento_link(prev_cdx, prefix)
|
||||
|
||||
prev_cdx = cdx
|
||||
|
||||
# last memento link, if any
|
||||
if prev_cdx:
|
||||
yield make_memento_link(prev_cdx, prefix, end='')
|
||||
yield make_timemap_memento_link(prev_cdx, prefix, end='')
|
||||
|
@ -151,7 +151,7 @@ class HttpsUrlRewriter(UrlRewriter):
|
||||
return self.remove_https(url)
|
||||
|
||||
def get_new_url(self, **kwargs):
|
||||
return kwargs.get('url')
|
||||
return kwargs.get('url', self.wburl.url)
|
||||
|
||||
def rebase_rewriter(self, new_url):
|
||||
return self
|
||||
|
@ -1,4 +1,5 @@
|
||||
import webtest
|
||||
import re
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
@ -20,12 +21,16 @@ class TestWb:
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def get_links(self, resp):
|
||||
return map(lambda x: x.strip(), resp.headers[LINK].split(','))
|
||||
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
|
||||
|
||||
def make_timemap_link(self, url):
|
||||
format_ = '<http://localhost:80/pywb/timemap/*/{0}>; rel="timemap"; type="{1}"'
|
||||
return format_.format(url, LINK_FORMAT)
|
||||
|
||||
def make_memento_link(self, url, ts, dt):
|
||||
format_ = '<http://localhost:80/pywb/{1}/{0}>; rel="memento"; datetime="{2}"'
|
||||
return format_.format(url, ts, dt)
|
||||
|
||||
# Below functionality is for archival (non-proxy) mode
|
||||
# It is designed to conform to Memento protocol Pattern 2.1
|
||||
# http://www.mementoweb.org/guide/rfc/#Pattern2.1
|
||||
@ -93,15 +98,37 @@ class TestWb:
|
||||
assert '/pywb/20140127171239/' in resp.headers['Location']
|
||||
|
||||
|
||||
def test_top_frame_no_date(self):
|
||||
def test_top_frame(self):
|
||||
"""
|
||||
A top-frame request with no date, must treat as intermediate
|
||||
A top-frame request with no date, not returning memento-datetime
|
||||
Include timemap, timegate, original headers
|
||||
"""
|
||||
|
||||
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css')
|
||||
|
||||
# not a timegate, ignore ACCEPT_DATETIME
|
||||
assert resp.status_int == 200
|
||||
|
||||
# no vary header
|
||||
assert VARY not in resp.headers
|
||||
|
||||
# not memento-datetime
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
def test_top_frame_no_date_accept_datetime(self):
|
||||
"""
|
||||
A top-frame request with no date, reflects back accept-datetime date
|
||||
Include timemap, timegate, original headers, and memento-datetime
|
||||
"""
|
||||
|
||||
dt = 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||
headers = {ACCEPT_DATETIME: dt}
|
||||
|
||||
# not a timegate, but use ACCEPT_DATETIME to infer memento for top frame
|
||||
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||
|
||||
assert resp.status_int == 200
|
||||
@ -109,40 +136,42 @@ class TestWb:
|
||||
# no vary header
|
||||
assert VARY not in resp.headers
|
||||
|
||||
# no memento-datetime
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
# memento-datetime matches
|
||||
assert resp.headers[MEMENTO_DATETIME] == dt
|
||||
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140126200804', dt) in links
|
||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
def test_top_frame_with_date(self):
|
||||
"""
|
||||
A top-frame request with date, treat as intermediate
|
||||
Include timemap, timegate, original headers and a link to the possible memento
|
||||
A top-frame request with date, treat as memento
|
||||
Include timemap, timegate, original headers, memento and memento-datetime
|
||||
"""
|
||||
|
||||
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||
dt = 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||
headers = {ACCEPT_DATETIME: dt}
|
||||
|
||||
# not a timegate, ignore ACCEPT_DATETIME
|
||||
resp = self.testapp.get('/pywb/20141012tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||
# not a timegate, ignore ACCEPT_DATETIME, but use provided timestamp as memento-datetime
|
||||
resp = self.testapp.get('/pywb/20141012000000tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||
|
||||
assert resp.status_int == 200
|
||||
|
||||
# no vary header
|
||||
assert VARY not in resp.headers
|
||||
|
||||
# no memento-datetime
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
dt = 'Sun, 12 Oct 2014 00:00:00 GMT'
|
||||
# memento-datetime matches
|
||||
assert resp.headers[MEMENTO_DATETIME] == dt
|
||||
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20141012000000', dt) in links, links
|
||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
assert '<http://localhost:80/pywb/20141012/http://www.iana.org/_css/2013.1/screen.css>; rel="memento"' in links
|
||||
|
||||
def test_memento_url(self):
|
||||
"""
|
||||
Memento response, 200 capture
|
||||
@ -156,6 +185,7 @@ class TestWb:
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140126200804', 'Sun, 26 Jan 2014 20:08:04 GMT') in links
|
||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||
@ -174,6 +204,7 @@ class TestWb:
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/domains/example>; rel="original"' in links
|
||||
assert '<http://localhost:80/pywb/http://www.iana.org/domains/example>; rel="timegate"' in links
|
||||
assert self.make_memento_link('http://www.iana.org/domains/example', '20140128051539', 'Tue, 28 Jan 2014 05:15:39 GMT') in links
|
||||
assert self.make_timemap_link('http://www.iana.org/domains/example') in links
|
||||
|
||||
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
|
||||
@ -241,6 +272,7 @@ rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
|
||||
# for memento
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original timegate"' in links
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:39 GMT"' in links
|
||||
#assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
assert resp.headers[MEMENTO_DATETIME] == 'Mon, 27 Jan 2014 17:12:39 GMT'
|
||||
@ -266,6 +298,7 @@ rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
|
||||
# for memento
|
||||
links = self.get_links(resp)
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original timegate"' in links
|
||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="memento"; datetime="Sun, 26 Jan 2014 20:08:04 GMT"' in links
|
||||
#assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||
|
||||
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||
|
Loading…
x
Reference in New Issue
Block a user