mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
'inverse' framed replay: ensure memento headers point to actual memento in inverse framed replay
add additional test for inverse framed replay, #92 fix framed replay url replace slash
This commit is contained in:
parent
bd21480db9
commit
8bd6787595
@ -47,6 +47,8 @@ class MementoRespMixin(object):
|
||||
if not wbrequest or not wbrequest.wb_url:
|
||||
return
|
||||
|
||||
mod = wbrequest.options.get('replay_mod', '')
|
||||
|
||||
#is_top_frame = wbrequest.wb_url.is_top_frame
|
||||
is_top_frame = wbrequest.options.get('is_top_frame')
|
||||
|
||||
@ -95,7 +97,7 @@ class MementoRespMixin(object):
|
||||
self.status_headers.headers.append(('Memento-Datetime',
|
||||
http_date))
|
||||
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='',
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod=mod,
|
||||
timestamp=ts,
|
||||
url=url)
|
||||
|
||||
@ -113,7 +115,7 @@ class MementoRespMixin(object):
|
||||
link.append(self.make_timemap_link(wbrequest))
|
||||
|
||||
if is_memento and not is_timegate:
|
||||
timegate = wbrequest.urlrewriter.get_new_url(mod='', timestamp='')
|
||||
timegate = wbrequest.urlrewriter.get_new_url(mod=mod, timestamp='')
|
||||
link.append(self.make_link(timegate, 'timegate'))
|
||||
|
||||
link = ', '.join(link)
|
||||
@ -142,11 +144,13 @@ class MementoResponse(MementoRespMixin, WbResponse):
|
||||
|
||||
|
||||
#=================================================================
|
||||
def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
|
||||
def make_timemap_memento_link(cdx, prefix, datetime=None,
|
||||
rel='memento', end=',\n', mod=''):
|
||||
|
||||
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
|
||||
|
||||
string = WbUrl.to_wburl_str(url=cdx['url'],
|
||||
mod='',
|
||||
mod=mod,
|
||||
timestamp=cdx['timestamp'],
|
||||
type=WbUrl.REPLAY)
|
||||
|
||||
@ -162,6 +166,7 @@ def make_timemap_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\
|
||||
def make_timemap(wbrequest, cdx_lines):
|
||||
prefix = wbrequest.wb_prefix
|
||||
url = wbrequest.wb_url.url
|
||||
mod = wbrequest.options.get('replay_mod', '')
|
||||
|
||||
# get first memento as it'll be used for 'from' field
|
||||
first_cdx = cdx_lines.next()
|
||||
@ -179,20 +184,24 @@ def make_timemap(wbrequest, cdx_lines):
|
||||
|
||||
# timegate link
|
||||
timegate = '<{0}>; rel="timegate",\n'
|
||||
yield timegate.format(prefix + url)
|
||||
timegate_url= WbUrl.to_wburl_str(url=url,
|
||||
mod=mod,
|
||||
type=WbUrl.LATEST_REPLAY)
|
||||
|
||||
yield timegate.format(prefix + timegate_url)
|
||||
|
||||
# first memento link
|
||||
yield make_timemap_memento_link(first_cdx, prefix,
|
||||
datetime=from_date)
|
||||
datetime=from_date, mod=mod)
|
||||
|
||||
prev_cdx = None
|
||||
|
||||
for cdx in cdx_lines:
|
||||
if prev_cdx:
|
||||
yield make_timemap_memento_link(prev_cdx, prefix)
|
||||
yield make_timemap_memento_link(prev_cdx, prefix, mod=mod)
|
||||
|
||||
prev_cdx = cdx
|
||||
|
||||
# last memento link, if any
|
||||
if prev_cdx:
|
||||
yield make_timemap_memento_link(prev_cdx, prefix, end='')
|
||||
yield make_timemap_memento_link(prev_cdx, prefix, end='', mod=mod)
|
||||
|
@ -26,7 +26,7 @@ var curr_state = {};
|
||||
|
||||
function make_url(url, ts, mod)
|
||||
{
|
||||
if (mod) {
|
||||
if (ts || mod) {
|
||||
mod += "/";
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,9 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
||||
if wbrequest.wb_url_str == '/':
|
||||
return self.render_search_page(wbrequest)
|
||||
|
||||
wbrequest.options['replay_mod'] = self.replay_mod
|
||||
wbrequest.options['frame_mod'] = self.frame_mod
|
||||
|
||||
# render top level frame if in frame mode
|
||||
# (not supported in proxy mode)
|
||||
if (self.is_frame_mode and wbrequest.wb_url and
|
||||
@ -97,9 +100,7 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
||||
wbrequest=wbrequest,
|
||||
timestamp=timestamp,
|
||||
url=wbrequest.wb_url.get_url(),
|
||||
banner_html=self.banner_html,
|
||||
frame_mod=self.frame_mod,
|
||||
replay_mod=self.replay_mod)
|
||||
banner_html=self.banner_html)
|
||||
|
||||
return params
|
||||
|
||||
@ -198,15 +199,12 @@ class StaticHandler(BaseHandler):
|
||||
full_path = self.static_path + url
|
||||
|
||||
try:
|
||||
data = self.block_loader.load(full_path)
|
||||
data = self.block_loader.load_file_or_resource(full_path)
|
||||
|
||||
try:
|
||||
data.seek(0, 2)
|
||||
size = data.tell()
|
||||
data.seek(0)
|
||||
headers = [('Content-Length', str(size))]
|
||||
except IOError:
|
||||
headers = None
|
||||
data.seek(0, 2)
|
||||
size = data.tell()
|
||||
data.seek(0)
|
||||
headers = [('Content-Length', str(size))]
|
||||
|
||||
if 'wsgi.file_wrapper' in wbrequest.env:
|
||||
reader = wbrequest.env['wsgi.file_wrapper'](data)
|
||||
|
21
tests/memento_fixture.py
Normal file
21
tests/memento_fixture.py
Normal file
@ -0,0 +1,21 @@
|
||||
import re
|
||||
|
||||
MEMENTO_DATETIME = 'Memento-Datetime'
|
||||
ACCEPT_DATETIME = 'Accept-Datetime'
|
||||
LINK = 'Link'
|
||||
VARY = 'Vary'
|
||||
LINK_FORMAT = 'application/link-format'
|
||||
|
||||
class MementoMixin(object):
|
||||
def get_links(self, resp):
|
||||
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
|
||||
|
||||
def make_timemap_link(self, url, coll='pywb'):
|
||||
format_ = '<http://localhost:80/{2}/timemap/*/{0}>; rel="timemap"; type="{1}"'
|
||||
return format_.format(url, LINK_FORMAT, coll)
|
||||
|
||||
def make_memento_link(self, url, ts, dt, coll='pywb'):
|
||||
format_ = '<http://localhost:80/{3}/{1}/{0}>; rel="memento"; datetime="{2}"'
|
||||
return format_.format(url, ts, dt, coll)
|
||||
|
||||
|
14
tests/test_config_frames.yaml
Normal file
14
tests/test_config_frames.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
collections:
|
||||
# <name>: <cdx_path>
|
||||
# collection will be accessed via /<name>
|
||||
# <cdx_path> is a string or list of:
|
||||
# - string or list of one or more local .cdx file
|
||||
# - string or list of one or more local dirs with .cdx files
|
||||
# - a string value indicating remote http cdx server
|
||||
pywb: ./sample_archive/cdx/
|
||||
|
||||
archive_paths: ./sample_archive/warcs/
|
||||
|
||||
enable_memento: true
|
||||
|
||||
framed_replay: inverse
|
59
tests/test_framed_inverse.py
Normal file
59
tests/test_framed_inverse.py
Normal file
@ -0,0 +1,59 @@
|
||||
import webtest
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app
|
||||
|
||||
from memento_fixture import *
|
||||
|
||||
class TestMementoFrameInverse(MementoMixin):
|
||||
TEST_CONFIG = 'tests/test_config_frames.yaml'
|
||||
|
||||
def setup(self):
|
||||
self.app = init_app(create_wb_router,
|
||||
load_yaml=True,
|
||||
config_file=self.TEST_CONFIG)
|
||||
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def test_top_frame_replay(self):
|
||||
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
||||
|
||||
# Memento Headers
|
||||
# no vary header
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME in resp.headers
|
||||
|
||||
# memento link
|
||||
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
|
||||
|
||||
links = self.get_links(resp)
|
||||
assert self.make_memento_link('http://www.iana.org/', '20140127171238mp_', dt) in links
|
||||
|
||||
#timegate link
|
||||
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/>; rel="timegate"' in links
|
||||
|
||||
# Body
|
||||
assert '<iframe ' in resp.body
|
||||
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body, resp.body
|
||||
|
||||
def test_inner_replay(self):
|
||||
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
|
||||
|
||||
# Memento Headers
|
||||
# no vary header
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME in resp.headers
|
||||
|
||||
# memento link
|
||||
dt = 'Mon, 27 Jan 2014 17:12:38 GMT'
|
||||
|
||||
links = self.get_links(resp)
|
||||
assert self.make_memento_link('http://www.iana.org/', '20140127171238mp_', dt) in links
|
||||
|
||||
# timegate link
|
||||
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/>; rel="timegate"' in links
|
||||
|
||||
# Body
|
||||
assert '"20140127171238"' in resp.body
|
||||
assert 'wb.js' in resp.body
|
||||
assert 'new _WBWombat' in resp.body, resp.body
|
||||
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body
|
@ -5,13 +5,9 @@ from pywb.framework.wsgi_wrappers import init_app
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
from pywb.utils.timeutils import timestamp_now
|
||||
|
||||
MEMENTO_DATETIME = 'Memento-Datetime'
|
||||
ACCEPT_DATETIME = 'Accept-Datetime'
|
||||
LINK = 'Link'
|
||||
VARY = 'Vary'
|
||||
LINK_FORMAT = 'application/link-format'
|
||||
from memento_fixture import *
|
||||
|
||||
class TestWb:
|
||||
class TestMemento(MementoMixin):
|
||||
TEST_CONFIG = 'tests/test_config_memento.yaml'
|
||||
|
||||
def setup(self):
|
||||
@ -21,17 +17,6 @@ class TestWb:
|
||||
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def get_links(self, resp):
|
||||
return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
|
||||
|
||||
def make_timemap_link(self, url, coll='pywb'):
|
||||
format_ = '<http://localhost:80/{2}/timemap/*/{0}>; rel="timemap"; type="{1}"'
|
||||
return format_.format(url, LINK_FORMAT, coll)
|
||||
|
||||
def make_memento_link(self, url, ts, dt, coll='pywb'):
|
||||
format_ = '<http://localhost:80/{3}/{1}/{0}>; rel="memento"; datetime="{2}"'
|
||||
return format_.format(url, ts, dt, coll)
|
||||
|
||||
# Below functionality is for archival (non-proxy) mode
|
||||
# It is designed to conform to Memento protocol Pattern 2.1
|
||||
# http://www.mementoweb.org/guide/rfc/#Pattern2.1
|
||||
|
Loading…
x
Reference in New Issue
Block a user