1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

memento: fix headers to be more consistent for framed replay. when using

frames, outer frames 'mirrors' mementos of the inner frame to be
discoverable by client side memento tools, tracked via #70
This commit is contained in:
Ilya Kreymer 2015-01-29 22:27:15 -08:00
parent 757345d317
commit 55426e7619
3 changed files with 73 additions and 28 deletions

View File

@ -74,14 +74,23 @@ class MementoRespMixin(object):
link = []
if is_memento and cdx:
http_date = timestamp_to_http_date(cdx['timestamp'])
self.status_headers.headers.append(('Memento-Datetime', http_date))
if is_memento:
if cdx:
http_date = timestamp_to_http_date(cdx['timestamp'])
# for top frame
elif wbrequest.wb_url.timestamp:
http_date = timestamp_to_http_date(wbrequest.wb_url.timestamp)
else:
http_date = None
elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
# top frame special case
canon_link = wbrequest.urlrewriter.get_new_url(mod='')
link.append(self.make_link(canon_link, 'memento'))
if http_date:
self.status_headers.headers.append(('Memento-Datetime',
http_date))
canon_link = wbrequest.urlrewriter.get_new_url(mod='')
link.append(self.make_memento_link(canon_link,
'memento',
http_date))
req_url = wbrequest.wb_url.url
@ -105,6 +114,9 @@ class MementoRespMixin(object):
def make_link(self, url, type):
return '<{0}>; rel="{1}"'.format(url, type)
def make_memento_link(self, url, type_, dt):
return '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type_, dt)
def make_timemap_link(self, wbrequest):
format_ = '<{0}>; rel="timemap"; type="{1}"'
@ -121,7 +133,7 @@ class MementoResponse(MementoRespMixin, WbResponse):
#=================================================================
def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
string = WbUrl.to_wburl_str(url=cdx['original'],
@ -161,17 +173,17 @@ def make_timemap(wbrequest, cdx_lines):
yield timegate.format(prefix + url)
# first memento link
yield make_memento_link(first_cdx, prefix,
yield make_timemap_memento_link(first_cdx, prefix,
datetime=from_date)
prev_cdx = None
for cdx in cdx_lines:
if prev_cdx:
yield make_memento_link(prev_cdx, prefix)
yield make_timemap_memento_link(prev_cdx, prefix)
prev_cdx = cdx
# last memento link, if any
if prev_cdx:
yield make_memento_link(prev_cdx, prefix, end='')
yield make_timemap_memento_link(prev_cdx, prefix, end='')

View File

@ -151,7 +151,7 @@ class HttpsUrlRewriter(UrlRewriter):
return self.remove_https(url)
def get_new_url(self, **kwargs):
return kwargs.get('url')
return kwargs.get('url', self.wburl.url)
def rebase_rewriter(self, new_url):
return self

View File

@ -1,4 +1,5 @@
import webtest
import re
from pywb.webapp.pywb_init import create_wb_router
from pywb.framework.wsgi_wrappers import init_app
from pywb.cdx.cdxobject import CDXObject
@ -20,12 +21,16 @@ class TestWb:
self.testapp = webtest.TestApp(self.app)
def get_links(self, resp):
return map(lambda x: x.strip(), resp.headers[LINK].split(','))
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
def make_timemap_link(self, url):
format_ = '<http://localhost:80/pywb/timemap/*/{0}>; rel="timemap"; type="{1}"'
return format_.format(url, LINK_FORMAT)
def make_memento_link(self, url, ts, dt):
format_ = '<http://localhost:80/pywb/{1}/{0}>; rel="memento"; datetime="{2}"'
return format_.format(url, ts, dt)
# Below functionality is for archival (non-proxy) mode
# It is designed to conform to Memento protocol Pattern 2.1
# http://www.mementoweb.org/guide/rfc/#Pattern2.1
@ -93,15 +98,37 @@ class TestWb:
assert '/pywb/20140127171239/' in resp.headers['Location']
def test_top_frame_no_date(self):
def test_top_frame(self):
"""
A top-frame request with no date, must treat as intermediate
A top-frame request with no date, not returning memento-datetime
Include timemap, timegate, original headers
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css')
# not a timegate, ignore ACCEPT_DATETIME
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# not memento-datetime
assert MEMENTO_DATETIME not in resp.headers
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
def test_top_frame_no_date_accept_datetime(self):
"""
A top-frame request with no date, reflects back accept-datetime date
Include timemap, timegate, original headers, and memento-datetime
"""
dt = 'Sun, 26 Jan 2014 20:08:04 GMT'
headers = {ACCEPT_DATETIME: dt}
# not a timegate, but use ACCEPT_DATETIME to infer memento for top frame
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
@ -109,40 +136,42 @@ class TestWb:
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
# memento-datetime matches
assert resp.headers[MEMENTO_DATETIME] == dt
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140126200804', dt) in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
def test_top_frame_with_date(self):
"""
A top-frame request with date, treat as intermediate
Include timemap, timegate, original headers and a link to the possible memento
A top-frame request with date, treat as memento
Include timemap, timegate, original headers, memento and memento-datetime
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
dt = 'Sun, 26 Jan 2014 20:08:04 GMT'
headers = {ACCEPT_DATETIME: dt}
# not a timegate, ignore ACCEPT_DATETIME
resp = self.testapp.get('/pywb/20141012tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
# not a timegate, ignore ACCEPT_DATETIME, but use provided timestamp as memento-datetime
resp = self.testapp.get('/pywb/20141012000000tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
dt = 'Sun, 12 Oct 2014 00:00:00 GMT'
# memento-datetime matches
assert resp.headers[MEMENTO_DATETIME] == dt
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20141012000000', dt) in links, links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert '<http://localhost:80/pywb/20141012/http://www.iana.org/_css/2013.1/screen.css>; rel="memento"' in links
def test_memento_url(self):
"""
Memento response, 200 capture
@ -156,6 +185,7 @@ class TestWb:
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_memento_link('http://www.iana.org/_css/2013.1/screen.css', '20140126200804', 'Sun, 26 Jan 2014 20:08:04 GMT') in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
@ -174,6 +204,7 @@ class TestWb:
links = self.get_links(resp)
assert '<http://www.iana.org/domains/example>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/domains/example>; rel="timegate"' in links
assert self.make_memento_link('http://www.iana.org/domains/example', '20140128051539', 'Tue, 28 Jan 2014 05:15:39 GMT') in links
assert self.make_timemap_link('http://www.iana.org/domains/example') in links
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
@ -241,6 +272,7 @@ rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
# for memento
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original timegate"' in links
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:39 GMT"' in links
#assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert resp.headers[MEMENTO_DATETIME] == 'Mon, 27 Jan 2014 17:12:39 GMT'
@ -266,6 +298,7 @@ rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
# for memento
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original timegate"' in links
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="memento"; datetime="Sun, 26 Jan 2014 20:08:04 GMT"' in links
#assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'