1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

framed replay: invert framed replay paradigm, replay always uses

canonical, no-modifier archival url (instead of mp_).
When using frames, the page redirects to a 'tf_' page, which then uses
replaceHistory() to change url back to canonical form.
memento: support for framed replay, include memento headers in top frame
bump version to 0.6.2
This commit is contained in:
Ilya Kreymer 2014-10-18 11:21:07 -07:00
parent b99dcb41f0
commit 4a1cc46fa3
20 changed files with 233 additions and 145 deletions

View File

@ -1,6 +1,10 @@
pywb 0.6.1 changelist pywb 0.6.2 changelist
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
* Enable Memento support for framed replay, include Memento headers in top frame
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner. * Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner. Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.

View File

@ -1,4 +1,4 @@
PyWb 0.6.1 PyWb 0.6.2
========== ==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop

View File

@ -46,15 +46,22 @@ class MementoRespMixin(object):
if not wbrequest or not wbrequest.wb_url: if not wbrequest or not wbrequest.wb_url:
return return
is_timegate = wbrequest.options.get('is_timegate', False) is_top_frame = wbrequest.wb_url.is_top_frame
is_timegate = wbrequest.options.get('is_timegate', False) and not is_top_frame
if is_timegate: if is_timegate:
self.status_headers.headers.append(('Vary', 'accept-datetime')) self.status_headers.headers.append(('Vary', 'accept-datetime'))
# Determine if memento: # Determine if memento:
# if no cdx included, definitely not a memento is_memento = False
# if no cdx included, not a memento, unless top-frame special
if not cdx: if not cdx:
is_memento = False # special case: include the headers but except Memento-Datetime
# since this is really an intermediate resource
if is_top_frame:
is_memento = True
# otherwise, if in proxy mode, then always a memento # otherwise, if in proxy mode, then always a memento
elif wbrequest.options['is_proxy']: elif wbrequest.options['is_proxy']:
@ -64,13 +71,19 @@ class MementoRespMixin(object):
else: else:
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY) is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
if is_memento: link = []
if is_memento and cdx:
http_date = timestamp_to_http_date(cdx['timestamp']) http_date = timestamp_to_http_date(cdx['timestamp'])
self.status_headers.headers.append(('Memento-Datetime', http_date)) self.status_headers.headers.append(('Memento-Datetime', http_date))
req_url = wbrequest.wb_url.url elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
# top frame special case
canon_link = wbrequest.urlrewriter.prefix
canon_link += wbrequest.wb_url.to_str(mod='')
link.append(self.make_link(canon_link, 'memento'))
link = [] req_url = wbrequest.wb_url.url
if is_memento and is_timegate: if is_memento and is_timegate:
link.append(self.make_link(req_url, 'original timegate')) link.append(self.make_link(req_url, 'original timegate'))
@ -82,7 +95,8 @@ class MementoRespMixin(object):
link.append(self.make_timemap_link(wbrequest)) link.append(self.make_timemap_link(wbrequest))
if is_memento and not is_timegate: if is_memento and not is_timegate:
timegate = wbrequest.urlrewriter.get_timestamp_url('') timegate = wbrequest.urlrewriter.prefix
timegate += wbrequest.wb_url.to_str(mod='', timestamp='')
link.append(self.make_link(timegate, 'timegate')) link.append(self.make_link(timegate, 'timegate'))
link = ', '.join(link) link = ', '.join(link)
@ -115,7 +129,7 @@ def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
string = WbUrl.to_wburl_str(url=cdx['original'], string = WbUrl.to_wburl_str(url=cdx['original'],
mod='mp_', mod='',
timestamp=cdx['timestamp'], timestamp=cdx['timestamp'],
type=WbUrl.REPLAY) type=WbUrl.REPLAY)
@ -148,7 +162,7 @@ def make_timemap(wbrequest, cdx_lines):
# timegate link # timegate link
timegate = '<{0}>; rel="timegate",\n' timegate = '<{0}>; rel="timegate",\n'
yield timegate.format(prefix + 'mp_/' + url) yield timegate.format(prefix + url)
# first memento link # first memento link
yield make_memento_link(first_cdx, prefix, yield make_memento_link(first_cdx, prefix,

View File

@ -51,6 +51,8 @@ class WbRequest(object):
self.coll = coll self.coll = coll
self.final_mod = ''
if not host_prefix: if not host_prefix:
host_prefix = self.make_host_prefix(env) host_prefix = self.make_host_prefix(env)

View File

@ -25,7 +25,7 @@ class RegexRewriter(object):
@staticmethod @staticmethod
def archival_rewrite(rewriter): def archival_rewrite(rewriter):
return lambda string: rewriter.rewrite(string, 'mp_') return lambda string: rewriter.rewrite(string)
#@staticmethod #@staticmethod
#def replacer(other): #def replacer(other):

View File

@ -28,7 +28,7 @@ class RewriteContent:
ds_rules_file=ds_rules_file) ds_rules_file=ds_rules_file)
if is_framed_replay: if is_framed_replay:
self.defmod = 'mp_' self.defmod = ''
else: else:
self.defmod = '' self.defmod = ''

View File

@ -62,7 +62,7 @@ ur"""
# Script tag # Script tag
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>') >>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</script> <script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</script>
# Script tag + crossorigin # Script tag + crossorigin
>>> parse('<script src="/js/scripts.js" crossorigin="anonymous"></script>') >>> parse('<script src="/js/scripts.js" crossorigin="anonymous"></script>')
@ -70,21 +70,21 @@ ur"""
# Unterminated script tag, handle and auto-terminate # Unterminated script tag, handle and auto-terminate
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>') >>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>')
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</sc></script> <script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</sc></script>
>>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>') >>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>')
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html;/*]]>*/"</script> <script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html;/*]]>*/"</script>
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>') >>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
<div style="background: url('/web/20131226101010mp_/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div> <div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
# Style # Style
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>') >>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
<style>@import "/web/20131226101010mp_/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010mp_/http://example.com/some/path/myfont.ttf') }</style> <style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style>
# Unterminated style tag, handle and auto-terminate # Unterminated style tag, handle and auto-terminate
>>> parse('<style>@import url(styles.css)') >>> parse('<style>@import url(styles.css)')
<style>@import url(/web/20131226101010mp_/http://example.com/some/path/styles.css)</style> <style>@import url(/web/20131226101010/http://example.com/some/path/styles.css)</style>
# Head Insertion # Head Insertion
>>> parse('<html><head><script src="other.js"></script></head><body>Test</body></html>', head_insert = '<script src="cool.js"></script>') >>> parse('<html><head><script src="other.js"></script></head><body>Test</body></html>', head_insert = '<script src="cool.js"></script>')

View File

@ -12,16 +12,16 @@ r"""
#================================================================= #=================================================================
>>> _test_js('location = "http://example.com/abc.html"') >>> _test_js('location = "http://example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"' 'WB_wombat_location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js(r'location = "http:\/\/example.com/abc.html"') >>> _test_js(r'location = "http:\/\/example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http:\\/\\/example.com/abc.html"' 'WB_wombat_location = "/web/20131010/http:\\/\\/example.com/abc.html"'
>>> _test_js(r'location = "http:\\/\\/example.com/abc.html"') >>> _test_js(r'location = "http:\\/\\/example.com/abc.html"')
'WB_wombat_location = "/web/20131010mp_/http:\\\\/\\\\/example.com/abc.html"' 'WB_wombat_location = "/web/20131010/http:\\\\/\\\\/example.com/abc.html"'
>>> _test_js(r"location = 'http://example.com/abc.html/'") >>> _test_js(r"location = 'http://example.com/abc.html/'")
"WB_wombat_location = '/web/20131010mp_/http://example.com/abc.html/'" "WB_wombat_location = '/web/20131010/http://example.com/abc.html/'"
>>> _test_js(r'location = http://example.com/abc.html/') >>> _test_js(r'location = http://example.com/abc.html/')
'WB_wombat_location = http://example.com/abc.html/' 'WB_wombat_location = http://example.com/abc.html/'
@ -37,21 +37,21 @@ r"""
'"/location" == some_location_val; locations = WB_wombat_location;' '"/location" == some_location_val; locations = WB_wombat_location;'
>>> _test_js('cool_Location = "http://example.com/abc.html"') >>> _test_js('cool_Location = "http://example.com/abc.html"')
'cool_Location = "/web/20131010mp_/http://example.com/abc.html"' 'cool_Location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"') >>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"')
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"' 'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"') >>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"' 'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
# custom rules added # custom rules added
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)]) >>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"; /*some_func(); */' 'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
# scheme-agnostic # scheme-agnostic
>>> _test_js('cool_Location = "//example.com/abc.html" //comment') >>> _test_js('cool_Location = "//example.com/abc.html" //comment')
'cool_Location = "/web/20131010mp_/http://example.com/abc.html" //comment' 'cool_Location = "/web/20131010/http://example.com/abc.html" //comment'
# document.cookie test # document.cookie test
>>> _test_js('document.cookie = "a=b; Path=/"') >>> _test_js('document.cookie = "a=b; Path=/"')
@ -59,7 +59,7 @@ r"""
# js-escaped # js-escaped
>>> _test_js('&quot;http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;') >>> _test_js('&quot;http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;')
'&quot;/web/20131010mp_/http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;' '&quot;/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;'
#================================================================= #=================================================================
@ -67,68 +67,68 @@ r"""
#================================================================= #=================================================================
>>> _test_xml('<tag xmlns="http://www.example.com/ns" attr="http://example.com"></tag>') >>> _test_xml('<tag xmlns="http://www.example.com/ns" attr="http://example.com"></tag>')
'<tag xmlns="http://www.example.com/ns" attr="/web/20131010mp_/http://example.com"></tag>' '<tag xmlns="http://www.example.com/ns" attr="/web/20131010/http://example.com"></tag>'
>>> _test_xml('<tag xmlns:xsi="http://www.example.com/ns" attr=" http://example.com"></tag>') >>> _test_xml('<tag xmlns:xsi="http://www.example.com/ns" attr=" http://example.com"></tag>')
'<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010mp_/http://example.com"></tag>' '<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010/http://example.com"></tag>'
>>> _test_xml('<tag> http://example.com<other>abchttp://example.com</other></tag>') >>> _test_xml('<tag> http://example.com<other>abchttp://example.com</other></tag>')
'<tag> /web/20131010mp_/http://example.com<other>abchttp://example.com</other></tag>' '<tag> /web/20131010/http://example.com<other>abchttp://example.com</other></tag>'
>>> _test_xml('<main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> http://example.com </main>') >>> _test_xml('<main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> http://example.com </main>')
'<main> /web/20131010mp_/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010mp_/http://example.com </main>' '<main> /web/20131010/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010/http://example.com </main>'
#================================================================= #=================================================================
# CSS Rewriting # CSS Rewriting
#================================================================= #=================================================================
>>> _test_css("background: url('/some/path.html')") >>> _test_css("background: url('/some/path.html')")
"background: url('/web/20131010mp_/http://example.com/some/path.html')" "background: url('/web/20131010/http://example.com/some/path.html')"
>>> _test_css("background: url('../path.html')") >>> _test_css("background: url('../path.html')")
"background: url('/web/20131010mp_/http://example.com/path.html')" "background: url('/web/20131010/http://example.com/path.html')"
>>> _test_css("background: url(\"http://domain.com/path.html\")") >>> _test_css("background: url(\"http://domain.com/path.html\")")
'background: url("/web/20131010mp_/http://domain.com/path.html")' 'background: url("/web/20131010/http://domain.com/path.html")'
>>> _test_css("background: url(file.jpeg)") >>> _test_css("background: url(file.jpeg)")
'background: url(/web/20131010mp_/http://example.com/file.jpeg)' 'background: url(/web/20131010/http://example.com/file.jpeg)'
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')") >>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
"background:#abc url('/web/20131010mp_/http://example.com/static/images/layout/logo.png')" "background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')"
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')") >>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
"background:#000 url('/web/20131010mp_/http://example.com/images/layout/logo.png')" "background:#000 url('/web/20131010/http://example.com/images/layout/logo.png')"
>>> _test_css("background: url('')") >>> _test_css("background: url('')")
"background: url('')" "background: url('')"
>>> _test_css("background: url (\"weirdpath\')") >>> _test_css("background: url (\"weirdpath\')")
'background: url ("/web/20131010mp_/http://example.com/weirdpath\')' 'background: url ("/web/20131010/http://example.com/weirdpath\')'
>>> _test_css("@import url ('path.css')") >>> _test_css("@import url ('path.css')")
"@import url ('/web/20131010mp_/http://example.com/path.css')" "@import url ('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import url('path.css')") >>> _test_css("@import url('path.css')")
"@import url('/web/20131010mp_/http://example.com/path.css')" "@import url('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import ( 'path.css')") >>> _test_css("@import ( 'path.css')")
"@import ( '/web/20131010mp_/http://example.com/path.css')" "@import ( '/web/20131010/http://example.com/path.css')"
>>> _test_css("@import \"path.css\"") >>> _test_css("@import \"path.css\"")
'@import "/web/20131010mp_/http://example.com/path.css"' '@import "/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../path.css\"") >>> _test_css("@import ('../path.css\"")
'@import (\'/web/20131010mp_/http://example.com/path.css"' '@import (\'/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../url.css\"") >>> _test_css("@import ('../url.css\"")
'@import (\'/web/20131010mp_/http://example.com/url.css"' '@import (\'/web/20131010/http://example.com/url.css"'
>>> _test_css("@import (\"url.css\")") >>> _test_css("@import (\"url.css\")")
'@import ("/web/20131010mp_/http://example.com/url.css")' '@import ("/web/20131010/http://example.com/url.css")'
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)") >>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
'@import url(/web/20131010mp_/http://example.com/url.css)\n@import url(/web/20131010mp_/http://example.com/anotherurl.css)\n @import url(/web/20131010mp_/http://example.com/and_a_third.css)' '@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)'
""" """

View File

@ -193,10 +193,14 @@ class WbUrl(BaseWbUrl):
return (not self.mod or return (not self.mod or
self.mod == 'mp_') self.mod == 'mp_')
@property
def is_top_frame(self):
return (self.mod == 'tf_')
@property @property
def is_embed(self): def is_embed(self):
return (self.mod and return (self.mod and
self.mod not in ('id_', 'mp_', 'bn_')) self.mod not in ('id_', 'mp_', 'tf_', 'bn_'))
@property @property
def is_banner_only(self): def is_banner_only(self):

View File

@ -118,9 +118,9 @@ function notify_top() {
this.load = function() { this.load = function() {
if ((window.self == window.top) && wbinfo) { if ((window.self == window.top) && wbinfo) {
if (wbinfo.canon_url && (window.location.href != wbinfo.canon_url) && wbinfo.mod != "bn_") { if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") {
// Auto-redirect to top frame // Auto-redirect to top frame
window.location.replace(wbinfo.canon_url); window.location.replace(wbinfo.top_url);
} else { } else {
// Init Banner (no frame or top frame) // Init Banner (no frame or top frame)
add_event("readystatechange", init_banner, document); add_event("readystatechange", init_banner, document);

View File

@ -1,23 +1,25 @@
var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/; var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/;
var TS_REGEX = /\/([\d]{1,14})\//;
var curr_state = {}; var curr_state = {};
function make_outer_url(url, ts) function make_outer_url(url, ts)
{ {
if (ts) { if (ts) {
return wbinfo.prefix + ts + "/" + url; return wbinfo.prefix + ts + "tf_/" + url;
} else { } else {
return wbinfo.prefix + url; return wbinfo.prefix + "tf_/" + url;
} }
} }
function make_inner_url(url, ts) function make_inner_url(url, ts)
{ {
if (ts) { if (ts) {
return wbinfo.prefix + ts + "mp_/" + url; return wbinfo.prefix + ts + "/" + url;
} else { } else {
return wbinfo.prefix + "mp_/" + url; return wbinfo.prefix + "/" + url;
} }
} }
@ -39,7 +41,7 @@ function push_state(url, timestamp, capture_str, is_live) {
state.capture_str = capture_str; state.capture_str = capture_str;
state.is_live = is_live; state.is_live = is_live;
window.history.replaceState(state, "", state.outer_url); window.history.replaceState(state, "", state.inner_url);
set_state(state); set_state(state);
} }
@ -52,16 +54,12 @@ function pop_state(state) {
function extract_ts(url) function extract_ts(url)
{ {
var inx = url.indexOf("mp_"); var result = value.match(TS_REGEX);
if (inx < 0) { if (!result) {
return ""; return "";
} }
url = url.substring(0, inx);
inx = url.lastIndexOf("/"); return result[1];
if (inx <= 0) {
return "";
}
return url.substring(inx + 1);
} }
function extract_replay_url(url) { function extract_replay_url(url) {

View File

@ -712,7 +712,7 @@ WB_wombat_init = (function() {
wb_replay_prefix = replay_prefix; wb_replay_prefix = replay_prefix;
if (wb_replay_prefix) { if (wb_replay_prefix) {
wb_replay_date_prefix = replay_prefix + capture_date + "mp_/"; wb_replay_date_prefix = replay_prefix + capture_date + "/";
if (capture_date.length > 0) { if (capture_date.length > 0) {
wb_capture_date_part = "/" + capture_date + "/"; wb_capture_date_part = "/" + capture_date + "/";

View File

@ -8,7 +8,9 @@
wbinfo.is_frame = true; wbinfo.is_frame = true;
</script> </script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script> <script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/default_banner.js'> </script>
{% include banner_html ignore missing %}
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb_frame.js'> </script> <script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/> <link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
<!-- End WB Insert --> <!-- End WB Insert -->

View File

@ -16,7 +16,7 @@
wbinfo.timestamp = "{{ cdx.timestamp }}"; wbinfo.timestamp = "{{ cdx.timestamp }}";
wbinfo.prefix = "{{ wbrequest.wb_prefix }}"; wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
wbinfo.mod = "{{ wbrequest.wb_url.mod }}"; wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
wbinfo.canon_url = "{{ canon_url }}"; wbinfo.top_url = "{{ top_url }}";
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }}; wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
wbinfo.coll = "{{ wbrequest.coll }}"; wbinfo.coll = "{{ wbrequest.coll }}";
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}"; wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";

View File

@ -6,6 +6,7 @@ from datetime import datetime
from pywb.utils.wbexception import NotFoundException from pywb.utils.wbexception import NotFoundException
from pywb.utils.loaders import BlockLoader from pywb.utils.loaders import BlockLoader
from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
@ -15,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView from views import J2TemplateView
from replay_views import ReplayView from replay_views import ReplayView
from pywb.framework.memento import MementoResponse
from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.timeutils import datetime_to_timestamp
@ -30,13 +32,21 @@ class SearchPageWbUrlHandler(WbUrlHandler):
'Search Page')) 'Search Page'))
self.is_frame_mode = config.get('framed_replay', False) self.is_frame_mode = config.get('framed_replay', False)
self.response_class = WbResponse
if self.is_frame_mode: if self.is_frame_mode:
html = config.get('frame_insert_html', 'ui/frame_insert.html') html = config.get('frame_insert_html', 'ui/frame_insert.html')
self.frame_insert_view = (J2TemplateView. self.frame_insert_view = (J2TemplateView.
create_template(html, 'Frame Insert')) create_template(html, 'Frame Insert'))
self.banner_html = config.get('banner_html', 'banner.html')
if config.get('enable_memento', False):
self.response_class = MementoResponse
else: else:
self.frame_insert_view = None self.frame_insert_view = None
self.banner_html = None
def render_search_page(self, wbrequest, **kwargs): def render_search_page(self, wbrequest, **kwargs):
if self.search_view: if self.search_view:
@ -55,28 +65,36 @@ class SearchPageWbUrlHandler(WbUrlHandler):
# (not supported in proxy mode) # (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and if (self.is_frame_mode and wbrequest.wb_url and
not wbrequest.wb_url.is_query() and not wbrequest.wb_url.is_query() and
not wbrequest.wb_url.mod and
not wbrequest.options['is_proxy']): not wbrequest.options['is_proxy']):
params = self.get_top_frame_params(wbrequest) if wbrequest.wb_url.is_top_frame:
return self.get_top_frame_response(wbrequest)
return self.frame_insert_view.render_response(**params) else:
wbrequest.final_mod = 'tf_'
return self.handle_request(wbrequest) return self.handle_request(wbrequest)
def get_top_frame_params(self, wbrequest): def get_top_frame_response(self, wbrequest):
if wbrequest.wb_url.timestamp: if wbrequest.wb_url.timestamp:
timestamp = wbrequest.wb_url.timestamp timestamp = wbrequest.wb_url.timestamp
else: else:
timestamp = datetime_to_timestamp(datetime.utcnow()) timestamp = datetime_to_timestamp(datetime.utcnow())
embed_url = wbrequest.wb_url.to_str(mod='mp_') embed_url = wbrequest.wb_url.to_str(mod='')
return dict(embed_url=embed_url, params = dict(embed_url=embed_url,
wbrequest=wbrequest, wbrequest=wbrequest,
timestamp=timestamp, timestamp=timestamp,
url=wbrequest.wb_url.url, url=wbrequest.wb_url.url,
content_type='text/html') banner_html=self.banner_html)
headers = [('Content-Type', 'text/html; charset=utf-8')]
status_headers = StatusAndHeaders('200 OK', headers)
template_result = self.frame_insert_view.render_to_string(**params)
body = template_result.encode('utf-8')
return self.response_class(status_headers, [body], wbrequest=wbrequest)
#================================================================= #=================================================================

View File

@ -88,6 +88,9 @@ class J2TemplateView(object):
def _make_loaders(self, template_dir): def _make_loaders(self, template_dir):
loaders = [] loaders = []
loaders.append(FileSystemLoader(template_dir)) loaders.append(FileSystemLoader(template_dir))
# add relative and absolute path loaders for banner support
loaders.append(FileSystemLoader('.'))
loaders.append(FileSystemLoader('/'))
loaders.append(PackageLoader(self.env_globals['package'], template_dir)) loaders.append(PackageLoader(self.env_globals['package'], template_dir))
return loaders return loaders
@ -128,28 +131,21 @@ class HeadInsertView(J2TemplateView):
def create_insert_func(self, wbrequest, def create_insert_func(self, wbrequest,
include_ts=True): include_ts=True):
canon_url = wbrequest.wb_prefix + wbrequest.wb_url.to_str(mod='') top_url = wbrequest.wb_prefix
top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
include_wombat = not wbrequest.wb_url.is_banner_only include_wombat = not wbrequest.wb_url.is_banner_only
def make_head_insert(rule, cdx): def make_head_insert(rule, cdx):
return (self.render_to_string(wbrequest=wbrequest, return (self.render_to_string(wbrequest=wbrequest,
cdx=cdx, cdx=cdx,
canon_url=canon_url, top_url=top_url,
include_ts=include_ts, include_ts=include_ts,
include_wombat=include_wombat, include_wombat=include_wombat,
banner_html=self.banner_html, banner_html=self.banner_html,
rule=rule)) rule=rule))
return make_head_insert return make_head_insert
def _make_loaders(self, template_dir):
loaders = []
loaders.append(FileSystemLoader(template_dir))
# add relative and absolute path loaders
loaders.append(FileSystemLoader('.'))
loaders.append(FileSystemLoader('/'))
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
return loaders
@staticmethod @staticmethod
def init_from_config(config): def init_from_config(config):
view = config.get('head_insert_view') view = config.get('head_insert_view')

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup( setup(
name='pywb', name='pywb',
version='0.6.1', version='0.6.2',
url='https://github.com/ikreymer/pywb', url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer', author='Ilya Kreymer',
author_email='ikreymer@gmail.com', author_email='ikreymer@gmail.com',

View File

@ -87,19 +87,19 @@ class TestWb:
assert actual_len == 3, actual_len assert actual_len == 3, actual_len
def test_replay_top_frame(self): def test_replay_top_frame(self):
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/') resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
assert '<iframe ' in resp.body assert '<iframe ' in resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body assert '/pywb/20140127171238/http://www.iana.org/' in resp.body
def test_replay_content(self): def test_replay_content(self):
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/') resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
self._assert_basic_html(resp) self._assert_basic_html(resp)
assert '"20140127171238"' in resp.body assert '"20140127171238"' in resp.body
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
assert 'WB_wombat_init' in resp.body assert 'WB_wombat_init' in resp.body
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
def test_replay_non_frame_content(self): def test_replay_non_frame_content(self):
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/') resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
@ -110,28 +110,28 @@ class TestWb:
assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body
def test_replay_non_surt(self): def test_replay_non_surt(self):
resp = self.testapp.get('/pywb-nosurt/20140103030321mp_/http://example.com?example=1') resp = self.testapp.get('/pywb-nosurt/20140103030321/http://example.com?example=1')
self._assert_basic_html(resp) self._assert_basic_html(resp)
assert '"20140103030321"' in resp.body assert '"20140103030321"' in resp.body
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
assert '/pywb-nosurt/20140103030321mp_/http://www.iana.org/domains/example' in resp.body assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.body
def test_zero_len_revisit(self): def test_zero_len_revisit(self):
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com?example=2') resp = self.testapp.get('/pywb/20140603030341/http://example.com?example=2')
self._assert_basic_html(resp) self._assert_basic_html(resp)
assert '"20140603030341"' in resp.body assert '"20140603030341"' in resp.body
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
assert '/pywb/20140603030341mp_/http://www.iana.org/domains/example' in resp.body assert '/pywb/20140603030341/http://www.iana.org/domains/example' in resp.body
def test_replay_url_agnostic_revisit(self): def test_replay_url_agnostic_revisit(self):
resp = self.testapp.get('/pywb/20130729195151mp_/http://www.example.com/') resp = self.testapp.get('/pywb/20130729195151/http://www.example.com/')
self._assert_basic_html(resp) self._assert_basic_html(resp)
assert '"20130729195151"' in resp.body assert '"20130729195151"' in resp.body
assert 'wb.js' in resp.body assert 'wb.js' in resp.body
assert '/pywb/20130729195151mp_/http://www.iana.org/domains/example"' in resp.body assert '/pywb/20130729195151/http://www.iana.org/domains/example"' in resp.body
def test_replay_cdx_mod(self): def test_replay_cdx_mod(self):
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css') resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
@ -200,56 +200,56 @@ class TestWb:
assert resp.content_type == 'application/x-javascript' assert resp.content_type == 'application/x-javascript'
def test_redirect_1(self): def test_redirect_1(self):
resp = self.testapp.get('/pywb/20140127171237mp_/http://www.iana.org/') resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://iana.org') assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
def test_redirect_replay_2(self): def test_redirect_replay_2(self):
resp = self.testapp.get('/pywb/mp_/http://example.com/') resp = self.testapp.get('/pywb/http://example.com/')
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/20140127171251mp_/http://example.com') assert resp.headers['Location'].endswith('/20140127171251/http://example.com')
resp = resp.follow() resp = resp.follow()
#check resp #check resp
self._assert_basic_html(resp) self._assert_basic_html(resp)
assert '"20140127171251"' in resp.body assert '"20140127171251"' in resp.body
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
def test_redirect_relative_3(self): def test_redirect_relative_3(self):
# webtest uses Host: localhost:80 by default # webtest uses Host: localhost:80 by default
# first two requests should result in same redirect # first two requests should result in same redirect
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css' target = 'http://localhost:80/pywb/2014/http://iana.org/_css/2013.1/screen.css'
# without timestamp # without timestamp
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')]) resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location'] assert resp.headers['Location'] == target, resp.headers['Location']
# with timestamp # with timestamp
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')]) resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location'] assert resp.headers['Location'] == target, resp.headers['Location']
resp = resp.follow() resp = resp.follow()
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css') assert resp.headers['Location'].endswith('/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css')
resp = resp.follow() resp = resp.follow()
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/css' assert resp.content_type == 'text/css'
def test_rel_self_redirect(self): def test_rel_self_redirect(self):
uri = '/pywb/20140126200927mp_/http://www.iana.org/domains/root/db' uri = '/pywb/20140126200927/http://www.iana.org/domains/root/db'
resp = self.testapp.get(uri, status=302) resp = self.testapp.get(uri, status=302)
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140126200928mp_/http://www.iana.org/domains/root/db') assert resp.headers['Location'].endswith('/pywb/20140126200928/http://www.iana.org/domains/root/db')
#def test_referrer_self_redirect(self): #def test_referrer_self_redirect(self):
# uri = '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css' # uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
# host = 'somehost:8082' # host = 'somehost:8082'
# referrer = 'http://' + host + uri # referrer = 'http://' + host + uri
@ -262,33 +262,33 @@ class TestWb:
# assert resp.status_int == 302 # assert resp.status_int == 302
def test_not_existant_warc_other_capture(self): def test_not_existant_warc_other_capture(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=2') resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=2')
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2') assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
def test_missing_revisit_other_capture(self): def test_missing_revisit_other_capture(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=2') resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=2')
assert resp.status_int == 302 assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2') assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
def test_not_existant_warc_no_other(self): def test_not_existant_warc_no_other(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=3', status = 503) resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=3', status = 503)
assert resp.status_int == 503 assert resp.status_int == 503
def test_missing_revisit_no_other(self): def test_missing_revisit_no_other(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=3', status = 503) resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=3', status = 503)
assert resp.status_int == 503 assert resp.status_int == 503
def test_live_frame(self): def test_live_frame(self):
resp = self.testapp.get('/live/mp_/http://example.com/?test=test') resp = self.testapp.get('/live/http://example.com/?test=test')
assert resp.status_int == 200 assert resp.status_int == 200
def test_live_fallback(self): def test_live_fallback(self):
resp = self.testapp.get('/pywb-fallback/mp_/http://example.com/?test=test') resp = self.testapp.get('/pywb-fallback//http://example.com/?test=test')
assert resp.status_int == 200 assert resp.status_int == 200
def test_post_1(self): def test_post_1(self):
resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'}) resp = self.testapp.post('/pywb/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
# no redirects for POST, as some browsers (FF) show modal confirmation dialog! # no redirects for POST, as some browsers (FF) show modal confirmation dialog!
#assert resp.status_int == 307 #assert resp.status_int == 307
@ -303,24 +303,24 @@ class TestWb:
assert '"test": "abc"' in resp.body assert '"test": "abc"' in resp.body
def test_post_2(self): def test_post_2(self):
resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}) resp = self.testapp.post('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'})
assert resp.status_int == 200 assert resp.status_int == 200
assert '"data": "^"' in resp.body assert '"data": "^"' in resp.body
def test_post_invalid(self): def test_post_invalid(self):
# not json # not json
resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404) resp = self.testapp.post_json('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
assert resp.status_int == 404 assert resp.status_int == 404
def test_post_redirect(self): def test_post_redirect(self):
# post handled without redirect (since 307 not allowed) # post handled without redirect (since 307 not allowed)
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')]) resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014/http://httpbin.org/post')])
assert resp.status_int == 200 assert resp.status_int == 200
assert '"foo": "bar"' in resp.body assert '"foo": "bar"' in resp.body
assert '"test": "abc"' in resp.body assert '"test": "abc"' in resp.body
def test_excluded_content(self): def test_excluded_content(self):
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403) resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
assert resp.status_int == 403 assert resp.status_int == 403
assert 'Excluded' in resp.body assert 'Excluded' in resp.body
@ -365,7 +365,7 @@ class TestWb:
def test_error(self): def test_error(self):
resp = self.testapp.get('/pywb/mp_/?abc', status = 400) resp = self.testapp.get('/pywb/?abc', status = 400)
assert resp.status_int == 400 assert resp.status_int == 400
assert 'Invalid Url: http://?abc' in resp.body assert 'Invalid Url: http://?abc' in resp.body

View File

@ -10,32 +10,32 @@ class TestLiveRewriter:
def test_live_rewrite_1(self): def test_live_rewrite_1(self):
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')] headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
resp = self.testapp.get('/rewrite/mp_/http://example.com/', headers=headers) resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
assert resp.status_int == 200 assert resp.status_int == 200
def test_live_rewrite_redirect_2(self): def test_live_rewrite_redirect_2(self):
resp = self.testapp.get('/rewrite/mp_/http://facebook.com/') resp = self.testapp.get('/rewrite/http://facebook.com/')
assert resp.status_int == 301 assert resp.status_int == 301
def test_live_rewrite_post(self): def test_live_rewrite_post(self):
resp = self.testapp.post('/rewrite/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'}) resp = self.testapp.post('/rewrite/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
assert resp.status_int == 200 assert resp.status_int == 200
assert '"foo": "bar"' in resp.body assert '"foo": "bar"' in resp.body
assert '"test": "abc"' in resp.body assert '"test": "abc"' in resp.body
assert resp.status_int == 200 assert resp.status_int == 200
def test_live_rewrite_frame(self): def test_live_rewrite_frame(self):
resp = self.testapp.get('/rewrite/http://example.com/') resp = self.testapp.get('/rewrite/tf_/http://example.com/')
assert resp.status_int == 200 assert resp.status_int == 200
assert '<iframe ' in resp.body assert '<iframe ' in resp.body
assert 'src="/rewrite/mp_/http://example.com/"' in resp.body assert 'src="/rewrite/http://example.com/"' in resp.body
def test_live_invalid(self): def test_live_invalid(self):
resp = self.testapp.get('/rewrite/mp_/http://abcdef', status=400) resp = self.testapp.get('/rewrite/http://abcdef', status=400)
assert resp.status_int == 400 assert resp.status_int == 400
def test_live_invalid_2(self): def test_live_invalid_2(self):
resp = self.testapp.get('/rewrite/mp_/@#$@#$', status=400) resp = self.testapp.get('/rewrite/@#$@#$', status=400)
assert resp.status_int == 400 assert resp.status_int == 400

View File

@ -34,7 +34,7 @@ class TestWb:
""" """
TimeGate with no Accept-Datetime header TimeGate with no Accept-Datetime header
""" """
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css') resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css')
assert resp.status_int == 302 assert resp.status_int == 302
@ -46,7 +46,7 @@ class TestWb:
assert MEMENTO_DATETIME not in resp.headers assert MEMENTO_DATETIME not in resp.headers
assert '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location'] assert '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
def test_timegate_accept_datetime(self): def test_timegate_accept_datetime(self):
@ -54,7 +54,7 @@ class TestWb:
TimeGate with Accept-Datetime header TimeGate with Accept-Datetime header
""" """
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'} headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers) resp = self.testapp.get('/pywb//http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 302 assert resp.status_int == 302
@ -67,7 +67,7 @@ class TestWb:
assert MEMENTO_DATETIME not in resp.headers assert MEMENTO_DATETIME not in resp.headers
assert '/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location'] assert '/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
def test_non_timegate_intermediate_redir(self): def test_non_timegate_intermediate_redir(self):
@ -76,7 +76,7 @@ class TestWb:
""" """
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'} headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, partial timestamp /2014/ present # not a timegate, partial timestamp /2014/ present
resp = self.testapp.get('/pywb/2014mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers) resp = self.testapp.get('/pywb/2014/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 302 assert resp.status_int == 302
@ -90,14 +90,64 @@ class TestWb:
# redirect to latest, not negotiation via Accept-Datetime # redirect to latest, not negotiation via Accept-Datetime
assert '/pywb/20140127171239mp_/' in resp.headers['Location'] assert '/pywb/20140127171239/' in resp.headers['Location']
def test_top_frame_no_date(self):
"""
A top-frame request with no date, must treat as intermediate
Include timemap, timegate, original headers
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, ignore ACCEPT_DATETIME
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
def test_top_frame_with_date(self):
"""
A top-frame request with date, treat as intermediate
Include timemap, timegate, original headers and a link to the possible memento
"""
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
# not a timegate, ignore ACCEPT_DATETIME
resp = self.testapp.get('/pywb/20141012tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
assert resp.status_int == 200
# no vary header
assert VARY not in resp.headers
# no memento-datetime
assert MEMENTO_DATETIME not in resp.headers
links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert '<http://localhost:80/pywb/20141012/http://www.iana.org/_css/2013.1/screen.css>; rel="memento"' in links
def test_memento_url(self): def test_memento_url(self):
""" """
Memento response, 200 capture Memento response, 200 capture
""" """
resp = self.testapp.get('/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css') resp = self.testapp.get('/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css')
assert resp.status_int == 200 assert resp.status_int == 200
@ -105,7 +155,7 @@ class TestWb:
links = self.get_links(resp) links = self.get_links(resp)
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT' assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
@ -115,7 +165,7 @@ class TestWb:
""" """
Memento (capture) of a 302 response Memento (capture) of a 302 response
""" """
resp = self.testapp.get('/pywb/20140128051539mp_/http://www.iana.org/domains/example') resp = self.testapp.get('/pywb/20140128051539/http://www.iana.org/domains/example')
assert resp.status_int == 302 assert resp.status_int == 302
@ -123,7 +173,7 @@ class TestWb:
links = self.get_links(resp) links = self.get_links(resp)
assert '<http://www.iana.org/domains/example>; rel="original"' in links assert '<http://www.iana.org/domains/example>; rel="original"' in links
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/domains/example>; rel="timegate"' in links assert '<http://localhost:80/pywb/http://www.iana.org/domains/example>; rel="timegate"' in links
assert self.make_timemap_link('http://www.iana.org/domains/example') in links assert self.make_timemap_link('http://www.iana.org/domains/example') in links
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT' assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
@ -147,12 +197,12 @@ rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT"
assert lines[1] == '<http://example.com?example=1>; rel="original",' assert lines[1] == '<http://example.com?example=1>; rel="original",'
assert lines[2] == '<http://localhost:80/pywb/mp_/http://example.com?example=1>; rel="timegate",' assert lines[2] == '<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",'
assert lines[3] == '<http://localhost:80/pywb/20140103030321mp_/http://example.com?example=1>; \ assert lines[3] == '<http://localhost:80/pywb/20140103030321/http://example.com?example=1>; \
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT",' rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT",'
assert lines[4] == '<http://localhost:80/pywb/20140103030341mp_/http://example.com?example=1>; \ assert lines[4] == '<http://localhost:80/pywb/20140103030341/http://example.com?example=1>; \
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"' rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
def test_timemap_2(self): def test_timemap_2(self):