1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

'inverse' framed replay: ensure memento headers point to actual memento in inverse framed replay

add additional test for inverse framed replay, #92
fix framed replay url replace slash
This commit is contained in:
Ilya Kreymer 2015-04-01 16:21:44 -07:00
parent bd21480db9
commit 8bd6787595
7 changed files with 123 additions and 37 deletions

View File

@ -47,6 +47,8 @@ class MementoRespMixin(object):
if not wbrequest or not wbrequest.wb_url:
return
mod = wbrequest.options.get('replay_mod', '')
#is_top_frame = wbrequest.wb_url.is_top_frame
is_top_frame = wbrequest.options.get('is_top_frame')
@ -95,7 +97,7 @@ class MementoRespMixin(object):
self.status_headers.headers.append(('Memento-Datetime',
http_date))
canon_link = wbrequest.urlrewriter.get_new_url(mod='',
canon_link = wbrequest.urlrewriter.get_new_url(mod=mod,
timestamp=ts,
url=url)
@ -113,7 +115,7 @@ class MementoRespMixin(object):
link.append(self.make_timemap_link(wbrequest))
if is_memento and not is_timegate:
timegate = wbrequest.urlrewriter.get_new_url(mod='', timestamp='')
timegate = wbrequest.urlrewriter.get_new_url(mod=mod, timestamp='')
link.append(self.make_link(timegate, 'timegate'))
link = ', '.join(link)
@ -142,11 +144,13 @@ class MementoResponse(MementoRespMixin, WbResponse):
#=================================================================
def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
def make_timemap_memento_link(cdx, prefix, datetime=None,
rel='memento', end=',\n', mod=''):
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
string = WbUrl.to_wburl_str(url=cdx['url'],
mod='',
mod=mod,
timestamp=cdx['timestamp'],
type=WbUrl.REPLAY)
@ -162,6 +166,7 @@ def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\
def make_timemap(wbrequest, cdx_lines):
prefix = wbrequest.wb_prefix
url = wbrequest.wb_url.url
mod = wbrequest.options.get('replay_mod', '')
# get first memento as it'll be used for 'from' field
first_cdx = cdx_lines.next()
@ -179,20 +184,24 @@ def make_timemap(wbrequest, cdx_lines):
# timegate link
timegate = '<{0}>; rel="timegate",\n'
yield timegate.format(prefix + url)
timegate_url= WbUrl.to_wburl_str(url=url,
mod=mod,
type=WbUrl.LATEST_REPLAY)
yield timegate.format(prefix + timegate_url)
# first memento link
yield make_timemap_memento_link(first_cdx, prefix,
datetime=from_date)
datetime=from_date, mod=mod)
prev_cdx = None
for cdx in cdx_lines:
if prev_cdx:
yield make_timemap_memento_link(prev_cdx, prefix)
yield make_timemap_memento_link(prev_cdx, prefix, mod=mod)
prev_cdx = cdx
# last memento link, if any
if prev_cdx:
yield make_timemap_memento_link(prev_cdx, prefix, end='')
yield make_timemap_memento_link(prev_cdx, prefix, end='', mod=mod)

View File

@ -26,7 +26,7 @@ var curr_state = {};
function make_url(url, ts, mod)
{
if (mod) {
if (ts || mod) {
mod += "/";
}

View File

@ -65,6 +65,9 @@ class SearchPageWbUrlHandler(WbUrlHandler):
if wbrequest.wb_url_str == '/':
return self.render_search_page(wbrequest)
wbrequest.options['replay_mod'] = self.replay_mod
wbrequest.options['frame_mod'] = self.frame_mod
# render top level frame if in frame mode
# (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and
@ -97,9 +100,7 @@ class SearchPageWbUrlHandler(WbUrlHandler):
wbrequest=wbrequest,
timestamp=timestamp,
url=wbrequest.wb_url.get_url(),
banner_html=self.banner_html,
frame_mod=self.frame_mod,
replay_mod=self.replay_mod)
banner_html=self.banner_html)
return params
@ -198,15 +199,12 @@ class StaticHandler(BaseHandler):
full_path = self.static_path + url
try:
data = self.block_loader.load(full_path)
data = self.block_loader.load_file_or_resource(full_path)
try:
data.seek(0, 2)
size = data.tell()
data.seek(0)
headers = [('Content-Length', str(size))]
except IOError:
headers = None
data.seek(0, 2)
size = data.tell()
data.seek(0)
headers = [('Content-Length', str(size))]
if 'wsgi.file_wrapper' in wbrequest.env:
reader = wbrequest.env['wsgi.file_wrapper'](data)

21
tests/memento_fixture.py Normal file
View File

@ -0,0 +1,21 @@
import re
MEMENTO_DATETIME = 'Memento-Datetime'
ACCEPT_DATETIME = 'Accept-Datetime'
LINK = 'Link'
VARY = 'Vary'
LINK_FORMAT = 'application/link-format'
class MementoMixin(object):
def get_links(self, resp):
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
def make_timemap_link(self, url, coll='pywb'):
format_ = '<http://localhost:80/{2}/timemap/*/{0}>; rel="timemap"; type="{1}"'
return format_.format(url, LINK_FORMAT, coll)
def make_memento_link(self, url, ts, dt, coll='pywb'):
format_ = '<http://localhost:80/{3}/{1}/{0}>; rel="memento"; datetime="{2}"'
return format_.format(url, ts, dt, coll)

View File

@ -0,0 +1,14 @@
collections:
# <name>: <cdx_path>
# collection will be accessed via /<name>
# <cdx_path> is a string or list of:
# - string or list of one or more local .cdx file
# - string or list of one or more local dirs with .cdx files
# - a string value indicating remote http cdx server
pywb: ./sample_archive/cdx/
archive_paths: ./sample_archive/warcs/
enable_memento: true
framed_replay: inverse

View File

@ -0,0 +1,59 @@
import webtest
from pywb.webapp.pywb_init import create_wb_router
from pywb.framework.wsgi_wrappers import init_app
from memento_fixture import *
class TestMementoFrameInverse(MementoMixin):
TEST_CONFIG = 'tests/test_config_frames.yaml'
def setup(self):
self.app = init_app(create_wb_router,
load_yaml=True,
config_file=self.TEST_CONFIG)
self.testapp = webtest.TestApp(self.app)
def test_top_frame_replay(self):
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
# Memento Headers
# no vary header
assert VARY not in resp.headers
assert MEMENTO_DATETIME in resp.headers
# memento link
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
links = self.get_links(resp)
assert self.make_memento_link('http://www.iana.org/', '20140127171238mp_', dt) in links
#timegate link
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/>; rel="timegate"' in links
# Body
assert '<iframe ' in resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body, resp.body
def test_inner_replay(self):
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
# Memento Headers
# no vary header
assert VARY not in resp.headers
assert MEMENTO_DATETIME in resp.headers
# memento link
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
links = self.get_links(resp)
assert self.make_memento_link('http://www.iana.org/', '20140127171238mp_', dt) in links
# timegate link
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/>; rel="timegate"' in links
# Body
assert '"20140127171238"' in resp.body
assert 'wb.js' in resp.body
assert 'new _WBWombat' in resp.body, resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body

View File

@ -5,13 +5,9 @@ from pywb.framework.wsgi_wrappers import init_app
from pywb.cdx.cdxobject import CDXObject
from pywb.utils.timeutils import timestamp_now
MEMENTO_DATETIME = 'Memento-Datetime'
ACCEPT_DATETIME = 'Accept-Datetime'
LINK = 'Link'
VARY = 'Vary'
LINK_FORMAT = 'application/link-format'
from memento_fixture import *
class TestWb:
class TestMemento(MementoMixin):
TEST_CONFIG = 'tests/test_config_memento.yaml'
def setup(self):
@ -21,17 +17,6 @@ class TestWb:
self.testapp = webtest.TestApp(self.app)
def get_links(self, resp):
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
def make_timemap_link(self, url, coll='pywb'):
format_ = '<http://localhost:80/{2}/timemap/*/{0}>; rel="timemap"; type="{1}"'
return format_.format(url, LINK_FORMAT, coll)
def make_memento_link(self, url, ts, dt, coll='pywb'):
format_ = '<http://localhost:80/{3}/{1}/{0}>; rel="memento"; datetime="{2}"'
return format_.format(url, ts, dt, coll)
# Below functionality is for archival (non-proxy) mode
# It is designed to conform to Memento protocol Pattern 2.1
# http://www.mementoweb.org/guide/rfc/#Pattern2.1