1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

framed replay: invert framed replay paradigm, replay always uses

canonical, no-modifier archival url (instead of mp_).
When using frames, the page redirects to a 'tf_' page, which then uses
replaceHistory() to change url back to canonical form.
memento: support for framed replay, include memento headers in top frame
bump version to 0.6.2
This commit is contained in:
Ilya Kreymer 2014-10-18 11:21:07 -07:00
parent b99dcb41f0
commit 4a1cc46fa3
20 changed files with 233 additions and 145 deletions

View File

@ -1,6 +1,10 @@
pywb 0.6.1 changelist
pywb 0.6.2 changelist
~~~~~~~~~~~~~~~~~~~~~
* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
* Enable Memento support for framed replay, include Memento headers in top frame
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.

View File

@ -1,4 +1,4 @@
PyWb 0.6.1
PyWb 0.6.2
==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop

View File

@ -46,15 +46,22 @@ class MementoRespMixin(object):
if not wbrequest or not wbrequest.wb_url:
return
is_timegate = wbrequest.options.get('is_timegate', False)
is_top_frame = wbrequest.wb_url.is_top_frame
is_timegate = wbrequest.options.get('is_timegate', False) and not is_top_frame
if is_timegate:
self.status_headers.headers.append(('Vary', 'accept-datetime'))
# Determine if memento:
# if no cdx included, definitely not a memento
is_memento = False
# if no cdx included, not a memento, unless top-frame special
if not cdx:
is_memento = False
# special case: include the headers but except Memento-Datetime
# since this is really an intermediate resource
if is_top_frame:
is_memento = True
# otherwise, if in proxy mode, then always a memento
elif wbrequest.options['is_proxy']:
@ -64,13 +71,19 @@ class MementoRespMixin(object):
else:
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
if is_memento:
link = []
if is_memento and cdx:
http_date = timestamp_to_http_date(cdx['timestamp'])
self.status_headers.headers.append(('Memento-Datetime', http_date))
req_url = wbrequest.wb_url.url
elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
# top frame special case
canon_link = wbrequest.urlrewriter.prefix
canon_link += wbrequest.wb_url.to_str(mod='')
link.append(self.make_link(canon_link, 'memento'))
link = []
req_url = wbrequest.wb_url.url
if is_memento and is_timegate:
link.append(self.make_link(req_url, 'original timegate'))
@ -82,7 +95,8 @@ class MementoRespMixin(object):
link.append(self.make_timemap_link(wbrequest))
if is_memento and not is_timegate:
timegate = wbrequest.urlrewriter.get_timestamp_url('')
timegate = wbrequest.urlrewriter.prefix
timegate += wbrequest.wb_url.to_str(mod='', timestamp='')
link.append(self.make_link(timegate, 'timegate'))
link = ', '.join(link)
@ -115,7 +129,7 @@ def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
string = WbUrl.to_wburl_str(url=cdx['original'],
mod='mp_',
mod='',
timestamp=cdx['timestamp'],
type=WbUrl.REPLAY)
@ -148,7 +162,7 @@ def make_timemap(wbrequest, cdx_lines):
# timegate link
timegate = '<{0}>; rel="timegate",\n'
yield timegate.format(prefix + 'mp_/' + url)
yield timegate.format(prefix + url)
# first memento link
yield make_memento_link(first_cdx, prefix,

View File

@ -51,6 +51,8 @@ class WbRequest(object):
self.coll = coll
self.final_mod = ''
if not host_prefix:
host_prefix = self.make_host_prefix(env)

View File

@ -25,7 +25,7 @@ class RegexRewriter(object):
@staticmethod
def archival_rewrite(rewriter):
return lambda string: rewriter.rewrite(string, 'mp_')
return lambda string: rewriter.rewrite(string)
#@staticmethod
#def replacer(other):

View File

@ -28,7 +28,7 @@ class RewriteContent:
ds_rules_file=ds_rules_file)
if is_framed_replay:
self.defmod = 'mp_'
self.defmod = ''
else:
self.defmod = ''

View File

@ -62,7 +62,7 @@ ur"""
# Script tag
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</script>
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</script>
# Script tag + crossorigin
>>> parse('<script src="/js/scripts.js" crossorigin="anonymous"></script>')
@ -70,21 +70,21 @@ ur"""
# Unterminated script tag, handle and auto-terminate
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>')
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</sc></script>
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</sc></script>
>>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>')
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html;/*]]>*/"</script>
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html;/*]]>*/"</script>
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
<div style="background: url('/web/20131226101010mp_/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
<div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
# Style
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
<style>@import "/web/20131226101010mp_/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010mp_/http://example.com/some/path/myfont.ttf') }</style>
<style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style>
# Unterminated style tag, handle and auto-terminate
>>> parse('<style>@import url(styles.css)')
<style>@import url(/web/20131226101010mp_/http://example.com/some/path/styles.css)</style>
<style>@import url(/web/20131226101010/http://example.com/some/path/styles.css)</style>
# Head Insertion
>>> parse('<html><head><script src="other.js"></script></head><body>Test</body></html>', head_insert = '<script src="cool.js"></script>')

View File

@ -12,16 +12,16 @@ r"""
#=================================================================
>>> _test_js('location = "http://example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"'
'WB_wombat_location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js(r'location = "http:\/\/example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http:\\/\\/example.com/abc.html"'
'WB_wombat_location = "/web/20131010/http:\\/\\/example.com/abc.html"'
>>> _test_js(r'location = "http:\\/\\/example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http:\\\\/\\\\/example.com/abc.html"'
'WB_wombat_location = "/web/20131010/http:\\\\/\\\\/example.com/abc.html"'
>>> _test_js(r"location = 'http://example.com/abc.html/'")
"WB_wombat_location = '/web/20131010mp_/http://example.com/abc.html/'"
"WB_wombat_location = '/web/20131010/http://example.com/abc.html/'"
>>> _test_js(r'location = http://example.com/abc.html/')
'WB_wombat_location = http://example.com/abc.html/'
@ -37,21 +37,21 @@ r"""
'"/location" == some_location_val; locations = WB_wombat_location;'
>>> _test_js('cool_Location = "http://example.com/abc.html"')
'cool_Location = "/web/20131010mp_/http://example.com/abc.html"'
'cool_Location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"')
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
# custom rules added
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"; /*some_func(); */'
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
# scheme-agnostic
>>> _test_js('cool_Location = "//example.com/abc.html" //comment')
'cool_Location = "/web/20131010mp_/http://example.com/abc.html" //comment'
'cool_Location = "/web/20131010/http://example.com/abc.html" //comment'
# document.cookie test
>>> _test_js('document.cookie = "a=b; Path=/"')
@ -59,7 +59,7 @@ r"""
# js-escaped
>>> _test_js('&quot;http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;')
'&quot;/web/20131010mp_/http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;'
'&quot;/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;'
#=================================================================
@ -67,68 +67,68 @@ r"""
#=================================================================
>>> _test_xml('<tag xmlns="http://www.example.com/ns" attr="http://example.com"></tag>')
'<tag xmlns="http://www.example.com/ns" attr="/web/20131010mp_/http://example.com"></tag>'
'<tag xmlns="http://www.example.com/ns" attr="/web/20131010/http://example.com"></tag>'
>>> _test_xml('<tag xmlns:xsi="http://www.example.com/ns" attr=" http://example.com"></tag>')
'<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010mp_/http://example.com"></tag>'
'<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010/http://example.com"></tag>'
>>> _test_xml('<tag> http://example.com<other>abchttp://example.com</other></tag>')
'<tag> /web/20131010mp_/http://example.com<other>abchttp://example.com</other></tag>'
'<tag> /web/20131010/http://example.com<other>abchttp://example.com</other></tag>'
>>> _test_xml('<main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> http://example.com </main>')
'<main> /web/20131010mp_/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010mp_/http://example.com </main>'
'<main> /web/20131010/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010/http://example.com </main>'
#=================================================================
# CSS Rewriting
#=================================================================
>>> _test_css("background: url('/some/path.html')")
"background: url('/web/20131010mp_/http://example.com/some/path.html')"
"background: url('/web/20131010/http://example.com/some/path.html')"
>>> _test_css("background: url('../path.html')")
"background: url('/web/20131010mp_/http://example.com/path.html')"
"background: url('/web/20131010/http://example.com/path.html')"
>>> _test_css("background: url(\"http://domain.com/path.html\")")
'background: url("/web/20131010mp_/http://domain.com/path.html")'
'background: url("/web/20131010/http://domain.com/path.html")'
>>> _test_css("background: url(file.jpeg)")
'background: url(/web/20131010mp_/http://example.com/file.jpeg)'
'background: url(/web/20131010/http://example.com/file.jpeg)'
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
"background:#abc url('/web/20131010mp_/http://example.com/static/images/layout/logo.png')"
"background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')"
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
"background:#000 url('/web/20131010mp_/http://example.com/images/layout/logo.png')"
"background:#000 url('/web/20131010/http://example.com/images/layout/logo.png')"
>>> _test_css("background: url('')")
"background: url('')"
>>> _test_css("background: url (\"weirdpath\')")
'background: url ("/web/20131010mp_/http://example.com/weirdpath\')'
'background: url ("/web/20131010/http://example.com/weirdpath\')'
>>> _test_css("@import url ('path.css')")
"@import url ('/web/20131010mp_/http://example.com/path.css')"
"@import url ('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import url('path.css')")
"@import url('/web/20131010mp_/http://example.com/path.css')"
"@import url('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import ( 'path.css')")
"@import ( '/web/20131010mp_/http://example.com/path.css')"
"@import ( '/web/20131010/http://example.com/path.css')"
>>> _test_css("@import \"path.css\"")
'@import "/web/20131010mp_/http://example.com/path.css"'
'@import "/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../path.css\"")
'@import (\'/web/20131010mp_/http://example.com/path.css"'
'@import (\'/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../url.css\"")
'@import (\'/web/20131010mp_/http://example.com/url.css"'
'@import (\'/web/20131010/http://example.com/url.css"'
>>> _test_css("@import (\"url.css\")")
'@import ("/web/20131010mp_/http://example.com/url.css")'
'@import ("/web/20131010/http://example.com/url.css")'
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
'@import url(/web/20131010mp_/http://example.com/url.css)\n@import url(/web/20131010mp_/http://example.com/anotherurl.css)\n @import url(/web/20131010mp_/http://example.com/and_a_third.css)'
'@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)'
"""

View File

@ -193,10 +193,14 @@ class WbUrl(BaseWbUrl):
return (not self.mod or
self.mod == 'mp_')
@property
def is_top_frame(self):
return (self.mod == 'tf_')
@property
def is_embed(self):
return (self.mod and
self.mod not in ('id_', 'mp_', 'bn_'))
self.mod not in ('id_', 'mp_', 'tf_', 'bn_'))
@property
def is_banner_only(self):

View File

@ -118,9 +118,9 @@ function notify_top() {
this.load = function() {
if ((window.self == window.top) && wbinfo) {
if (wbinfo.canon_url && (window.location.href != wbinfo.canon_url) && wbinfo.mod != "bn_") {
if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") {
// Auto-redirect to top frame
window.location.replace(wbinfo.canon_url);
window.location.replace(wbinfo.top_url);
} else {
// Init Banner (no frame or top frame)
add_event("readystatechange", init_banner, document);

View File

@ -1,23 +1,25 @@
var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/;
var TS_REGEX = /\/([\d]{1,14})\//;
var curr_state = {};
function make_outer_url(url, ts)
{
if (ts) {
return wbinfo.prefix + ts + "/" + url;
return wbinfo.prefix + ts + "tf_/" + url;
} else {
return wbinfo.prefix + url;
return wbinfo.prefix + "tf_/" + url;
}
}
function make_inner_url(url, ts)
{
if (ts) {
return wbinfo.prefix + ts + "mp_/" + url;
return wbinfo.prefix + ts + "/" + url;
} else {
return wbinfo.prefix + "mp_/" + url;
return wbinfo.prefix + "/" + url;
}
}
@ -39,7 +41,7 @@ function push_state(url, timestamp, capture_str, is_live) {
state.capture_str = capture_str;
state.is_live = is_live;
window.history.replaceState(state, "", state.outer_url);
window.history.replaceState(state, "", state.inner_url);
set_state(state);
}
@ -52,16 +54,12 @@ function pop_state(state) {
function extract_ts(url)
{
var inx = url.indexOf("mp_");
if (inx < 0) {
var result = value.match(TS_REGEX);
if (!result) {
return "";
}
url = url.substring(0, inx);
inx = url.lastIndexOf("/");
if (inx <= 0) {
return "";
}
return url.substring(inx + 1);
return result[1];
}
function extract_replay_url(url) {

View File

@ -712,7 +712,7 @@ WB_wombat_init = (function() {
wb_replay_prefix = replay_prefix;
if (wb_replay_prefix) {
wb_replay_date_prefix = replay_prefix + capture_date + "mp_/";
wb_replay_date_prefix = replay_prefix + capture_date + "/";
if (capture_date.length > 0) {
wb_capture_date_part = "/" + capture_date + "/";

View File

@ -8,7 +8,9 @@
wbinfo.is_frame = true;
</script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/default_banner.js'> </script>
{% include banner_html ignore missing %}
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
<!-- End WB Insert -->

View File

@ -16,7 +16,7 @@
wbinfo.timestamp = "{{ cdx.timestamp }}";
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
wbinfo.canon_url = "{{ canon_url }}";
wbinfo.top_url = "{{ top_url }}";
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
wbinfo.coll = "{{ wbrequest.coll }}";
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";

View File

@ -6,6 +6,7 @@ from datetime import datetime
from pywb.utils.wbexception import NotFoundException
from pywb.utils.loaders import BlockLoader
from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse
@ -15,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView
from replay_views import ReplayView
from pywb.framework.memento import MementoResponse
from pywb.utils.timeutils import datetime_to_timestamp
@ -30,13 +32,21 @@ class SearchPageWbUrlHandler(WbUrlHandler):
'Search Page'))
self.is_frame_mode = config.get('framed_replay', False)
self.response_class = WbResponse
if self.is_frame_mode:
html = config.get('frame_insert_html', 'ui/frame_insert.html')
self.frame_insert_view = (J2TemplateView.
create_template(html, 'Frame Insert'))
self.banner_html = config.get('banner_html', 'banner.html')
if config.get('enable_memento', False):
self.response_class = MementoResponse
else:
self.frame_insert_view = None
self.banner_html = None
def render_search_page(self, wbrequest, **kwargs):
if self.search_view:
@ -55,28 +65,36 @@ class SearchPageWbUrlHandler(WbUrlHandler):
# (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and
not wbrequest.wb_url.is_query() and
not wbrequest.wb_url.mod and
not wbrequest.options['is_proxy']):
params = self.get_top_frame_params(wbrequest)
return self.frame_insert_view.render_response(**params)
if wbrequest.wb_url.is_top_frame:
return self.get_top_frame_response(wbrequest)
else:
wbrequest.final_mod = 'tf_'
return self.handle_request(wbrequest)
def get_top_frame_params(self, wbrequest):
def get_top_frame_response(self, wbrequest):
if wbrequest.wb_url.timestamp:
timestamp = wbrequest.wb_url.timestamp
else:
timestamp = datetime_to_timestamp(datetime.utcnow())
embed_url = wbrequest.wb_url.to_str(mod='mp_')
embed_url = wbrequest.wb_url.to_str(mod='')
return dict(embed_url=embed_url,
wbrequest=wbrequest,
timestamp=timestamp,
url=wbrequest.wb_url.url,
content_type='text/html')
params = dict(embed_url=embed_url,
wbrequest=wbrequest,
timestamp=timestamp,
url=wbrequest.wb_url.url,
banner_html=self.banner_html)
headers = [('Content-Type', 'text/html; charset=utf-8')]
status_headers = StatusAndHeaders('200 OK', headers)
template_result = self.frame_insert_view.render_to_string(**params)
body = template_result.encode('utf-8')
return self.response_class(status_headers, [body], wbrequest=wbrequest)
#=================================================================

View File

@ -88,6 +88,9 @@ class J2TemplateView(object):
def _make_loaders(self, template_dir):
loaders = []
loaders.append(FileSystemLoader(template_dir))
# add relative and absolute path loaders for banner support
loaders.append(FileSystemLoader('.'))
loaders.append(FileSystemLoader('/'))
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
return loaders
@ -128,28 +131,21 @@ class HeadInsertView(J2TemplateView):
def create_insert_func(self, wbrequest,
include_ts=True):
canon_url = wbrequest.wb_prefix + wbrequest.wb_url.to_str(mod='')
top_url = wbrequest.wb_prefix
top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
include_wombat = not wbrequest.wb_url.is_banner_only
def make_head_insert(rule, cdx):
return (self.render_to_string(wbrequest=wbrequest,
cdx=cdx,
canon_url=canon_url,
top_url=top_url,
include_ts=include_ts,
include_wombat=include_wombat,
banner_html=self.banner_html,
rule=rule))
return make_head_insert
def _make_loaders(self, template_dir):
loaders = []
loaders.append(FileSystemLoader(template_dir))
# add relative and absolute path loaders
loaders.append(FileSystemLoader('.'))
loaders.append(FileSystemLoader('/'))
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
return loaders
@staticmethod
def init_from_config(config):
view = config.get('head_insert_view')

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup(
name='pywb',
version='0.6.1',
version='0.6.2',
url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer',
author_email='ikreymer@gmail.com',

View File

@ -87,19 +87,19 @@ class TestWb:
assert actual_len == 3, actual_len
def test_replay_top_frame(self):
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
assert '<iframe ' in resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body
assert '/pywb/20140127171238/http://www.iana.org/' in resp.body
def test_replay_content(self):
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
self._assert_basic_html(resp)
assert '"20140127171238"' in resp.body
assert 'wb.js' in resp.body
assert 'WB_wombat_init' in resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
def test_replay_non_frame_content(self):
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
@ -110,28 +110,28 @@ class TestWb:
assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body
def test_replay_non_surt(self):
resp = self.testapp.get('/pywb-nosurt/20140103030321mp_/http://example.com?example=1')
resp = self.testapp.get('/pywb-nosurt/20140103030321/http://example.com?example=1')
self._assert_basic_html(resp)
assert '"20140103030321"' in resp.body
assert 'wb.js' in resp.body
assert '/pywb-nosurt/20140103030321mp_/http://www.iana.org/domains/example' in resp.body
assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.body
def test_zero_len_revisit(self):
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140603030341/http://example.com?example=2')
self._assert_basic_html(resp)
assert '"20140603030341"' in resp.body
assert 'wb.js' in resp.body
assert '/pywb/20140603030341mp_/http://www.iana.org/domains/example' in resp.body
assert '/pywb/20140603030341/http://www.iana.org/domains/example' in resp.body
def test_replay_url_agnostic_revisit(self):
resp = self.testapp.get('/pywb/20130729195151mp_/http://www.example.com/')
resp = self.testapp.get('/pywb/20130729195151/http://www.example.com/')
self._assert_basic_html(resp)
assert '"20130729195151"' in resp.body
assert 'wb.js' in resp.body
assert '/pywb/20130729195151mp_/http://www.iana.org/domains/example"' in resp.body
assert '/pywb/20130729195151/http://www.iana.org/domains/example"' in resp.body
def test_replay_cdx_mod(self):
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
@ -200,56 +200,56 @@ class TestWb:
assert resp.content_type == 'application/x-javascript'
def test_redirect_1(self):
resp = self.testapp.get('/pywb/20140127171237mp_/http://www.iana.org/')
resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://iana.org')
assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
def test_redirect_replay_2(self):
resp = self.testapp.get('/pywb/mp_/http://example.com/')
resp = self.testapp.get('/pywb/http://example.com/')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/20140127171251mp_/http://example.com')
assert resp.headers['Location'].endswith('/20140127171251/http://example.com')
resp = resp.follow()
#check resp
self._assert_basic_html(resp)
assert '"20140127171251"' in resp.body
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
def test_redirect_relative_3(self):
# webtest uses Host: localhost:80 by default
# first two requests should result in same redirect
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
target = 'http://localhost:80/pywb/2014/http://iana.org/_css/2013.1/screen.css'
# without timestamp
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location']
# with timestamp
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location']
resp = resp.follow()
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css')
assert resp.headers['Location'].endswith('/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css')
resp = resp.follow()
assert resp.status_int == 200
assert resp.content_type == 'text/css'
def test_rel_self_redirect(self):
uri = '/pywb/20140126200927mp_/http://www.iana.org/domains/root/db'
uri = '/pywb/20140126200927/http://www.iana.org/domains/root/db'
resp = self.testapp.get(uri, status=302)
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140126200928mp_/http://www.iana.org/domains/root/db')
assert resp.headers['Location'].endswith('/pywb/20140126200928/http://www.iana.org/domains/root/db')
#def test_referrer_self_redirect(self):
# uri = '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css'
# uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
# host = 'somehost:8082'
# referrer = 'http://' + host + uri
@ -262,33 +262,33 @@ class TestWb:
# assert resp.status_int == 302
def test_not_existant_warc_other_capture(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=2')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
def test_missing_revisit_other_capture(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=2')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
def test_not_existant_warc_no_other(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=3', status = 503)
resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=3', status = 503)
assert resp.status_int == 503
def test_missing_revisit_no_other(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=3', status = 503)
resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=3', status = 503)
assert resp.status_int == 503
def test_live_frame(self):
resp = self.testapp.get('/live/mp_/http://example.com/?test=test')
resp = self.testapp.get('/live/http://example.com/?test=test')
assert resp.status_int == 200
def test_live_fallback(self):
resp = self.testapp.get('/pywb-fallback/mp_/http://example.com/?test=test')
resp = self.testapp.get('/pywb-fallback//http://example.com/?test=test')
assert resp.status_int == 200
def test_post_1(self):
resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
resp = self.testapp.post('/pywb/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
#assert resp.status_int == 307
@ -303,24 +303,24 @@ class TestWb:
assert '"test": "abc"' in resp.body
def test_post_2(self):
resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'})
resp = self.testapp.post('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'})
assert resp.status_int == 200
assert '"data": "^"' in resp.body
def test_post_invalid(self):
# not json
resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
resp = self.testapp.post_json('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
assert resp.status_int == 404
def test_post_redirect(self):
# post handled without redirect (since 307 not allowed)
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')])
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014/http://httpbin.org/post')])
assert resp.status_int == 200
assert '"foo": "bar"' in resp.body
assert '"test": "abc"' in resp.body
def test_excluded_content(self):
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
assert resp.status_int == 403
assert 'Excluded' in resp.body
@ -365,7 +365,7 @@ class TestWb:
def test_error(self):
resp = self.testapp.get('/pywb/mp_/?abc', status = 400)
resp = self.testapp.get('/pywb/?abc', status = 400)
assert resp.status_int == 400
assert 'Invalid Url: http://?abc' in resp.body

View File

@ -10,32 +10,32 @@ class TestLiveRewriter:
def test_live_rewrite_1(self):
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
resp = self.testapp.get('/rewrite/mp_/http://example.com/', headers=headers)
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
assert resp.status_int == 200
def test_live_rewrite_redirect_2(self):
resp = self.testapp.get('/rewrite/mp_/http://facebook.com/')
resp = self.testapp.get('/rewrite/http://facebook.com/')
assert resp.status_int == 301
def test_live_rewrite_post(self):
resp = self.testapp.post('/rewrite/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
resp = self.testapp.post('/rewrite/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
assert resp.status_int == 200
assert '"foo": "bar"' in resp.body
assert '"test": "abc"' in resp.body
assert resp.status_int == 200
def test_live_rewrite_frame(self):
resp = self.testapp.get('/rewrite/http://example.com/')
resp = self.testapp.get('/rewrite/tf_/http://example.com/')
assert resp.status_int == 200
assert '<iframe ' in resp.body
assert 'src="/rewrite/mp_/http://example.com/"' in resp.body
assert 'src="/rewrite/http://example.com/"' in resp.body
def test_live_invalid(self):
resp = self.testapp.get('/rewrite/mp_/http://abcdef', status=400)
resp = self.testapp.get('/rewrite/http://abcdef', status=400)
assert resp.status_int == 400
def test_live_invalid_2(self):
resp = self.testapp.get('/rewrite/mp_/@#$@#$', status=400)
resp = self.testapp.get('/rewrite/@#$@#$', status=400)
assert resp.status_int == 400

View File

@ -34,7 +34,7 @@ class TestWb:
"""
TimeGate with no Accept-Datetime header
"""
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css')
resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css')
assert resp.status_int == 302
@ -46,7 +46,7 @@ class TestWb:
assert MEMENTO_DATETIME not in resp.headers
assert '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
assert '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
def test_timegate_accept_datetime(self):
@ -54,7 +54,7 @@ class TestWb:
TimeGate with Accept-Datetime header
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
resp = self.testapp.get('/pywb//http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 302
@ -67,7 +67,7 @@ class TestWb:
assert MEMENTO_DATETIME not in resp.headers
assert '/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
assert '/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
def test_non_timegate_intermediate_redir(self):
@ -76,7 +76,7 @@ class TestWb:
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, partial timestamp /2014/ present
resp = self.testapp.get('/pywb/2014mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
resp = self.testapp.get('/pywb/2014/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 302
@ -90,14 +90,64 @@ class TestWb:
# redirect to latest, not negotiation via Accept-Datetime
assert '/pywb/20140127171239mp_/' in resp.headers['Location']
assert '/pywb/20140127171239/' in resp.headers['Location']
def test_top_frame_no_date(self):
"""
A top-frame request with no date, must treat as intermediate
Include timemap, timegate, original headers
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, ignore ACCEPT_DATETIME
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
def test_top_frame_with_date(self):
"""
A top-frame request with date, treat as intermediate
Include timemap, timegate, original headers and a link to the possible memento
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, ignore ACCEPT_DATETIME
resp = self.testapp.get('/pywb/20141012tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert '<http://localhost:80/pywb/20141012/http://www.iana.org/_css/2013.1/screen.css>; rel="memento"' in links
def test_memento_url(self):
"""
Memento response, 200 capture
"""
resp = self.testapp.get('/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css')
resp = self.testapp.get('/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css')
assert resp.status_int == 200
@ -105,7 +155,7 @@ class TestWb:
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
@ -115,7 +165,7 @@ class TestWb:
"""
Memento (capture) of a 302 response
"""
resp = self.testapp.get('/pywb/20140128051539mp_/http://www.iana.org/domains/example')
resp = self.testapp.get('/pywb/20140128051539/http://www.iana.org/domains/example')
assert resp.status_int == 302
@ -123,7 +173,7 @@ class TestWb:
links = self.get_links(resp)
assert '<http://www.iana.org/domains/example>; rel="original"' in links
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/domains/example>; rel="timegate"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/domains/example>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/domains/example') in links
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
@ -147,12 +197,12 @@ rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT"
assert lines[1] == '<http://example.com?example=1>; rel="original",'
assert lines[2] == '<http://localhost:80/pywb/mp_/http://example.com?example=1>; rel="timegate",'
assert lines[2] == '<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",'
assert lines[3] == '<http://localhost:80/pywb/20140103030321mp_/http://example.com?example=1>; \
assert lines[3] == '<http://localhost:80/pywb/20140103030321/http://example.com?example=1>; \
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT",'
assert lines[4] == '<http://localhost:80/pywb/20140103030341mp_/http://example.com?example=1>; \
assert lines[4] == '<http://localhost:80/pywb/20140103030341/http://example.com?example=1>; \
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
def test_timemap_2(self):