mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
framed replay: invert framed replay paradigm, replay always uses
canonical, no-modifier archival url (instead of mp_). When using frames, the page redirects to a 'tf_' page, which then uses replaceHistory() to change url back to canonical form. memento: support for framed replay, include memento headers in top frame bump version to 0.6.2
This commit is contained in:
parent
b99dcb41f0
commit
4a1cc46fa3
@ -1,6 +1,10 @@
|
|||||||
pywb 0.6.1 changelist
|
pywb 0.6.2 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
|
||||||
|
|
||||||
|
* Enable Memento support for framed replay, include Memento headers in top frame
|
||||||
|
|
||||||
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
|
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
|
||||||
|
|
||||||
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.
|
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
PyWb 0.6.1
|
PyWb 0.6.2
|
||||||
==========
|
==========
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop
|
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop
|
||||||
|
@ -46,15 +46,22 @@ class MementoRespMixin(object):
|
|||||||
if not wbrequest or not wbrequest.wb_url:
|
if not wbrequest or not wbrequest.wb_url:
|
||||||
return
|
return
|
||||||
|
|
||||||
is_timegate = wbrequest.options.get('is_timegate', False)
|
is_top_frame = wbrequest.wb_url.is_top_frame
|
||||||
|
|
||||||
|
is_timegate = wbrequest.options.get('is_timegate', False) and not is_top_frame
|
||||||
|
|
||||||
if is_timegate:
|
if is_timegate:
|
||||||
self.status_headers.headers.append(('Vary', 'accept-datetime'))
|
self.status_headers.headers.append(('Vary', 'accept-datetime'))
|
||||||
|
|
||||||
# Determine if memento:
|
# Determine if memento:
|
||||||
# if no cdx included, definitely not a memento
|
is_memento = False
|
||||||
|
|
||||||
|
# if no cdx included, not a memento, unless top-frame special
|
||||||
if not cdx:
|
if not cdx:
|
||||||
is_memento = False
|
# special case: include the headers but except Memento-Datetime
|
||||||
|
# since this is really an intermediate resource
|
||||||
|
if is_top_frame:
|
||||||
|
is_memento = True
|
||||||
|
|
||||||
# otherwise, if in proxy mode, then always a memento
|
# otherwise, if in proxy mode, then always a memento
|
||||||
elif wbrequest.options['is_proxy']:
|
elif wbrequest.options['is_proxy']:
|
||||||
@ -64,13 +71,19 @@ class MementoRespMixin(object):
|
|||||||
else:
|
else:
|
||||||
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
||||||
|
|
||||||
if is_memento:
|
link = []
|
||||||
|
|
||||||
|
if is_memento and cdx:
|
||||||
http_date = timestamp_to_http_date(cdx['timestamp'])
|
http_date = timestamp_to_http_date(cdx['timestamp'])
|
||||||
self.status_headers.headers.append(('Memento-Datetime', http_date))
|
self.status_headers.headers.append(('Memento-Datetime', http_date))
|
||||||
|
|
||||||
req_url = wbrequest.wb_url.url
|
elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
|
||||||
|
# top frame special case
|
||||||
|
canon_link = wbrequest.urlrewriter.prefix
|
||||||
|
canon_link += wbrequest.wb_url.to_str(mod='')
|
||||||
|
link.append(self.make_link(canon_link, 'memento'))
|
||||||
|
|
||||||
link = []
|
req_url = wbrequest.wb_url.url
|
||||||
|
|
||||||
if is_memento and is_timegate:
|
if is_memento and is_timegate:
|
||||||
link.append(self.make_link(req_url, 'original timegate'))
|
link.append(self.make_link(req_url, 'original timegate'))
|
||||||
@ -82,7 +95,8 @@ class MementoRespMixin(object):
|
|||||||
link.append(self.make_timemap_link(wbrequest))
|
link.append(self.make_timemap_link(wbrequest))
|
||||||
|
|
||||||
if is_memento and not is_timegate:
|
if is_memento and not is_timegate:
|
||||||
timegate = wbrequest.urlrewriter.get_timestamp_url('')
|
timegate = wbrequest.urlrewriter.prefix
|
||||||
|
timegate += wbrequest.wb_url.to_str(mod='', timestamp='')
|
||||||
link.append(self.make_link(timegate, 'timegate'))
|
link.append(self.make_link(timegate, 'timegate'))
|
||||||
|
|
||||||
link = ', '.join(link)
|
link = ', '.join(link)
|
||||||
@ -115,7 +129,7 @@ def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
|
|||||||
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
|
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
|
||||||
|
|
||||||
string = WbUrl.to_wburl_str(url=cdx['original'],
|
string = WbUrl.to_wburl_str(url=cdx['original'],
|
||||||
mod='mp_',
|
mod='',
|
||||||
timestamp=cdx['timestamp'],
|
timestamp=cdx['timestamp'],
|
||||||
type=WbUrl.REPLAY)
|
type=WbUrl.REPLAY)
|
||||||
|
|
||||||
@ -148,7 +162,7 @@ def make_timemap(wbrequest, cdx_lines):
|
|||||||
|
|
||||||
# timegate link
|
# timegate link
|
||||||
timegate = '<{0}>; rel="timegate",\n'
|
timegate = '<{0}>; rel="timegate",\n'
|
||||||
yield timegate.format(prefix + 'mp_/' + url)
|
yield timegate.format(prefix + url)
|
||||||
|
|
||||||
# first memento link
|
# first memento link
|
||||||
yield make_memento_link(first_cdx, prefix,
|
yield make_memento_link(first_cdx, prefix,
|
||||||
|
@ -51,6 +51,8 @@ class WbRequest(object):
|
|||||||
|
|
||||||
self.coll = coll
|
self.coll = coll
|
||||||
|
|
||||||
|
self.final_mod = ''
|
||||||
|
|
||||||
if not host_prefix:
|
if not host_prefix:
|
||||||
host_prefix = self.make_host_prefix(env)
|
host_prefix = self.make_host_prefix(env)
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ class RegexRewriter(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def archival_rewrite(rewriter):
|
def archival_rewrite(rewriter):
|
||||||
return lambda string: rewriter.rewrite(string, 'mp_')
|
return lambda string: rewriter.rewrite(string)
|
||||||
|
|
||||||
#@staticmethod
|
#@staticmethod
|
||||||
#def replacer(other):
|
#def replacer(other):
|
||||||
|
@ -28,7 +28,7 @@ class RewriteContent:
|
|||||||
ds_rules_file=ds_rules_file)
|
ds_rules_file=ds_rules_file)
|
||||||
|
|
||||||
if is_framed_replay:
|
if is_framed_replay:
|
||||||
self.defmod = 'mp_'
|
self.defmod = ''
|
||||||
else:
|
else:
|
||||||
self.defmod = ''
|
self.defmod = ''
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ ur"""
|
|||||||
|
|
||||||
# Script tag
|
# Script tag
|
||||||
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
|
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
|
||||||
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</script>
|
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</script>
|
||||||
|
|
||||||
# Script tag + crossorigin
|
# Script tag + crossorigin
|
||||||
>>> parse('<script src="/js/scripts.js" crossorigin="anonymous"></script>')
|
>>> parse('<script src="/js/scripts.js" crossorigin="anonymous"></script>')
|
||||||
@ -70,21 +70,21 @@ ur"""
|
|||||||
|
|
||||||
# Unterminated script tag, handle and auto-terminate
|
# Unterminated script tag, handle and auto-terminate
|
||||||
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>')
|
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>')
|
||||||
<script>window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html"</sc></script>
|
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</sc></script>
|
||||||
|
|
||||||
>>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>')
|
>>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>')
|
||||||
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010mp_/http://example.com/a/b/c.html;/*]]>*/"</script>
|
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html;/*]]>*/"</script>
|
||||||
|
|
||||||
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
|
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
|
||||||
<div style="background: url('/web/20131226101010mp_/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
|
<div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
|
||||||
|
|
||||||
# Style
|
# Style
|
||||||
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
|
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
|
||||||
<style>@import "/web/20131226101010mp_/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010mp_/http://example.com/some/path/myfont.ttf') }</style>
|
<style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style>
|
||||||
|
|
||||||
# Unterminated style tag, handle and auto-terminate
|
# Unterminated style tag, handle and auto-terminate
|
||||||
>>> parse('<style>@import url(styles.css)')
|
>>> parse('<style>@import url(styles.css)')
|
||||||
<style>@import url(/web/20131226101010mp_/http://example.com/some/path/styles.css)</style>
|
<style>@import url(/web/20131226101010/http://example.com/some/path/styles.css)</style>
|
||||||
|
|
||||||
# Head Insertion
|
# Head Insertion
|
||||||
>>> parse('<html><head><script src="other.js"></script></head><body>Test</body></html>', head_insert = '<script src="cool.js"></script>')
|
>>> parse('<html><head><script src="other.js"></script></head><body>Test</body></html>', head_insert = '<script src="cool.js"></script>')
|
||||||
|
@ -12,16 +12,16 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
>>> _test_js('location = "http://example.com/abc.html"')
|
>>> _test_js('location = "http://example.com/abc.html"')
|
||||||
'WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"'
|
'WB_wombat_location = "/web/20131010/http://example.com/abc.html"'
|
||||||
|
|
||||||
>>> _test_js(r'location = "http:\/\/example.com/abc.html"')
|
>>> _test_js(r'location = "http:\/\/example.com/abc.html"')
|
||||||
'WB_wombat_location = "/web/20131010mp_/http:\\/\\/example.com/abc.html"'
|
'WB_wombat_location = "/web/20131010/http:\\/\\/example.com/abc.html"'
|
||||||
|
|
||||||
>>> _test_js(r'location = "http:\\/\\/example.com/abc.html"')
|
>>> _test_js(r'location = "http:\\/\\/example.com/abc.html"')
|
||||||
'WB_wombat_location = "/web/20131010mp_/http:\\\\/\\\\/example.com/abc.html"'
|
'WB_wombat_location = "/web/20131010/http:\\\\/\\\\/example.com/abc.html"'
|
||||||
|
|
||||||
>>> _test_js(r"location = 'http://example.com/abc.html/'")
|
>>> _test_js(r"location = 'http://example.com/abc.html/'")
|
||||||
"WB_wombat_location = '/web/20131010mp_/http://example.com/abc.html/'"
|
"WB_wombat_location = '/web/20131010/http://example.com/abc.html/'"
|
||||||
|
|
||||||
>>> _test_js(r'location = http://example.com/abc.html/')
|
>>> _test_js(r'location = http://example.com/abc.html/')
|
||||||
'WB_wombat_location = http://example.com/abc.html/'
|
'WB_wombat_location = http://example.com/abc.html/'
|
||||||
@ -37,21 +37,21 @@ r"""
|
|||||||
'"/location" == some_location_val; locations = WB_wombat_location;'
|
'"/location" == some_location_val; locations = WB_wombat_location;'
|
||||||
|
|
||||||
>>> _test_js('cool_Location = "http://example.com/abc.html"')
|
>>> _test_js('cool_Location = "http://example.com/abc.html"')
|
||||||
'cool_Location = "/web/20131010mp_/http://example.com/abc.html"'
|
'cool_Location = "/web/20131010/http://example.com/abc.html"'
|
||||||
|
|
||||||
>>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"')
|
>>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"')
|
||||||
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
|
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
|
||||||
|
|
||||||
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
|
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
|
||||||
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
|
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
|
||||||
|
|
||||||
# custom rules added
|
# custom rules added
|
||||||
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
|
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
|
||||||
'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"; /*some_func(); */'
|
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
|
||||||
|
|
||||||
# scheme-agnostic
|
# scheme-agnostic
|
||||||
>>> _test_js('cool_Location = "//example.com/abc.html" //comment')
|
>>> _test_js('cool_Location = "//example.com/abc.html" //comment')
|
||||||
'cool_Location = "/web/20131010mp_/http://example.com/abc.html" //comment'
|
'cool_Location = "/web/20131010/http://example.com/abc.html" //comment'
|
||||||
|
|
||||||
# document.cookie test
|
# document.cookie test
|
||||||
>>> _test_js('document.cookie = "a=b; Path=/"')
|
>>> _test_js('document.cookie = "a=b; Path=/"')
|
||||||
@ -59,7 +59,7 @@ r"""
|
|||||||
|
|
||||||
# js-escaped
|
# js-escaped
|
||||||
>>> _test_js('"http:\\/\\/www.example.com\\/some\\/path\\/?query=1"')
|
>>> _test_js('"http:\\/\\/www.example.com\\/some\\/path\\/?query=1"')
|
||||||
'"/web/20131010mp_/http:\\/\\/www.example.com\\/some\\/path\\/?query=1"'
|
'"/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1"'
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -67,68 +67,68 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
>>> _test_xml('<tag xmlns="http://www.example.com/ns" attr="http://example.com"></tag>')
|
>>> _test_xml('<tag xmlns="http://www.example.com/ns" attr="http://example.com"></tag>')
|
||||||
'<tag xmlns="http://www.example.com/ns" attr="/web/20131010mp_/http://example.com"></tag>'
|
'<tag xmlns="http://www.example.com/ns" attr="/web/20131010/http://example.com"></tag>'
|
||||||
|
|
||||||
>>> _test_xml('<tag xmlns:xsi="http://www.example.com/ns" attr=" http://example.com"></tag>')
|
>>> _test_xml('<tag xmlns:xsi="http://www.example.com/ns" attr=" http://example.com"></tag>')
|
||||||
'<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010mp_/http://example.com"></tag>'
|
'<tag xmlns:xsi="http://www.example.com/ns" attr=" /web/20131010/http://example.com"></tag>'
|
||||||
|
|
||||||
>>> _test_xml('<tag> http://example.com<other>abchttp://example.com</other></tag>')
|
>>> _test_xml('<tag> http://example.com<other>abchttp://example.com</other></tag>')
|
||||||
'<tag> /web/20131010mp_/http://example.com<other>abchttp://example.com</other></tag>'
|
'<tag> /web/20131010/http://example.com<other>abchttp://example.com</other></tag>'
|
||||||
|
|
||||||
>>> _test_xml('<main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> http://example.com </main>')
|
>>> _test_xml('<main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> http://example.com </main>')
|
||||||
'<main> /web/20131010mp_/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010mp_/http://example.com </main>'
|
'<main> /web/20131010/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com"/> /web/20131010/http://example.com </main>'
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# CSS Rewriting
|
# CSS Rewriting
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
>>> _test_css("background: url('/some/path.html')")
|
>>> _test_css("background: url('/some/path.html')")
|
||||||
"background: url('/web/20131010mp_/http://example.com/some/path.html')"
|
"background: url('/web/20131010/http://example.com/some/path.html')"
|
||||||
|
|
||||||
>>> _test_css("background: url('../path.html')")
|
>>> _test_css("background: url('../path.html')")
|
||||||
"background: url('/web/20131010mp_/http://example.com/path.html')"
|
"background: url('/web/20131010/http://example.com/path.html')"
|
||||||
|
|
||||||
>>> _test_css("background: url(\"http://domain.com/path.html\")")
|
>>> _test_css("background: url(\"http://domain.com/path.html\")")
|
||||||
'background: url("/web/20131010mp_/http://domain.com/path.html")'
|
'background: url("/web/20131010/http://domain.com/path.html")'
|
||||||
|
|
||||||
>>> _test_css("background: url(file.jpeg)")
|
>>> _test_css("background: url(file.jpeg)")
|
||||||
'background: url(/web/20131010mp_/http://example.com/file.jpeg)'
|
'background: url(/web/20131010/http://example.com/file.jpeg)'
|
||||||
|
|
||||||
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
|
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
|
||||||
"background:#abc url('/web/20131010mp_/http://example.com/static/images/layout/logo.png')"
|
"background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')"
|
||||||
|
|
||||||
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
|
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
|
||||||
"background:#000 url('/web/20131010mp_/http://example.com/images/layout/logo.png')"
|
"background:#000 url('/web/20131010/http://example.com/images/layout/logo.png')"
|
||||||
|
|
||||||
>>> _test_css("background: url('')")
|
>>> _test_css("background: url('')")
|
||||||
"background: url('')"
|
"background: url('')"
|
||||||
|
|
||||||
>>> _test_css("background: url (\"weirdpath\')")
|
>>> _test_css("background: url (\"weirdpath\')")
|
||||||
'background: url ("/web/20131010mp_/http://example.com/weirdpath\')'
|
'background: url ("/web/20131010/http://example.com/weirdpath\')'
|
||||||
|
|
||||||
>>> _test_css("@import url ('path.css')")
|
>>> _test_css("@import url ('path.css')")
|
||||||
"@import url ('/web/20131010mp_/http://example.com/path.css')"
|
"@import url ('/web/20131010/http://example.com/path.css')"
|
||||||
|
|
||||||
>>> _test_css("@import url('path.css')")
|
>>> _test_css("@import url('path.css')")
|
||||||
"@import url('/web/20131010mp_/http://example.com/path.css')"
|
"@import url('/web/20131010/http://example.com/path.css')"
|
||||||
|
|
||||||
>>> _test_css("@import ( 'path.css')")
|
>>> _test_css("@import ( 'path.css')")
|
||||||
"@import ( '/web/20131010mp_/http://example.com/path.css')"
|
"@import ( '/web/20131010/http://example.com/path.css')"
|
||||||
|
|
||||||
>>> _test_css("@import \"path.css\"")
|
>>> _test_css("@import \"path.css\"")
|
||||||
'@import "/web/20131010mp_/http://example.com/path.css"'
|
'@import "/web/20131010/http://example.com/path.css"'
|
||||||
|
|
||||||
>>> _test_css("@import ('../path.css\"")
|
>>> _test_css("@import ('../path.css\"")
|
||||||
'@import (\'/web/20131010mp_/http://example.com/path.css"'
|
'@import (\'/web/20131010/http://example.com/path.css"'
|
||||||
|
|
||||||
>>> _test_css("@import ('../url.css\"")
|
>>> _test_css("@import ('../url.css\"")
|
||||||
'@import (\'/web/20131010mp_/http://example.com/url.css"'
|
'@import (\'/web/20131010/http://example.com/url.css"'
|
||||||
|
|
||||||
>>> _test_css("@import (\"url.css\")")
|
>>> _test_css("@import (\"url.css\")")
|
||||||
'@import ("/web/20131010mp_/http://example.com/url.css")'
|
'@import ("/web/20131010/http://example.com/url.css")'
|
||||||
|
|
||||||
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
|
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
|
||||||
'@import url(/web/20131010mp_/http://example.com/url.css)\n@import url(/web/20131010mp_/http://example.com/anotherurl.css)\n @import url(/web/20131010mp_/http://example.com/and_a_third.css)'
|
'@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -193,10 +193,14 @@ class WbUrl(BaseWbUrl):
|
|||||||
return (not self.mod or
|
return (not self.mod or
|
||||||
self.mod == 'mp_')
|
self.mod == 'mp_')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_top_frame(self):
|
||||||
|
return (self.mod == 'tf_')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_embed(self):
|
def is_embed(self):
|
||||||
return (self.mod and
|
return (self.mod and
|
||||||
self.mod not in ('id_', 'mp_', 'bn_'))
|
self.mod not in ('id_', 'mp_', 'tf_', 'bn_'))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_banner_only(self):
|
def is_banner_only(self):
|
||||||
|
@ -118,9 +118,9 @@ function notify_top() {
|
|||||||
|
|
||||||
this.load = function() {
|
this.load = function() {
|
||||||
if ((window.self == window.top) && wbinfo) {
|
if ((window.self == window.top) && wbinfo) {
|
||||||
if (wbinfo.canon_url && (window.location.href != wbinfo.canon_url) && wbinfo.mod != "bn_") {
|
if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") {
|
||||||
// Auto-redirect to top frame
|
// Auto-redirect to top frame
|
||||||
window.location.replace(wbinfo.canon_url);
|
window.location.replace(wbinfo.top_url);
|
||||||
} else {
|
} else {
|
||||||
// Init Banner (no frame or top frame)
|
// Init Banner (no frame or top frame)
|
||||||
add_event("readystatechange", init_banner, document);
|
add_event("readystatechange", init_banner, document);
|
||||||
|
@ -1,23 +1,25 @@
|
|||||||
var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/;
|
var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/;
|
||||||
|
|
||||||
|
var TS_REGEX = /\/([\d]{1,14})\//;
|
||||||
|
|
||||||
var curr_state = {};
|
var curr_state = {};
|
||||||
|
|
||||||
|
|
||||||
function make_outer_url(url, ts)
|
function make_outer_url(url, ts)
|
||||||
{
|
{
|
||||||
if (ts) {
|
if (ts) {
|
||||||
return wbinfo.prefix + ts + "/" + url;
|
return wbinfo.prefix + ts + "tf_/" + url;
|
||||||
} else {
|
} else {
|
||||||
return wbinfo.prefix + url;
|
return wbinfo.prefix + "tf_/" + url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function make_inner_url(url, ts)
|
function make_inner_url(url, ts)
|
||||||
{
|
{
|
||||||
if (ts) {
|
if (ts) {
|
||||||
return wbinfo.prefix + ts + "mp_/" + url;
|
return wbinfo.prefix + ts + "/" + url;
|
||||||
} else {
|
} else {
|
||||||
return wbinfo.prefix + "mp_/" + url;
|
return wbinfo.prefix + "/" + url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -39,7 +41,7 @@ function push_state(url, timestamp, capture_str, is_live) {
|
|||||||
state.capture_str = capture_str;
|
state.capture_str = capture_str;
|
||||||
state.is_live = is_live;
|
state.is_live = is_live;
|
||||||
|
|
||||||
window.history.replaceState(state, "", state.outer_url);
|
window.history.replaceState(state, "", state.inner_url);
|
||||||
|
|
||||||
set_state(state);
|
set_state(state);
|
||||||
}
|
}
|
||||||
@ -52,16 +54,12 @@ function pop_state(state) {
|
|||||||
|
|
||||||
function extract_ts(url)
|
function extract_ts(url)
|
||||||
{
|
{
|
||||||
var inx = url.indexOf("mp_");
|
var result = value.match(TS_REGEX);
|
||||||
if (inx < 0) {
|
if (!result) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
url = url.substring(0, inx);
|
|
||||||
inx = url.lastIndexOf("/");
|
return result[1];
|
||||||
if (inx <= 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
return url.substring(inx + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function extract_replay_url(url) {
|
function extract_replay_url(url) {
|
||||||
|
@ -712,7 +712,7 @@ WB_wombat_init = (function() {
|
|||||||
wb_replay_prefix = replay_prefix;
|
wb_replay_prefix = replay_prefix;
|
||||||
|
|
||||||
if (wb_replay_prefix) {
|
if (wb_replay_prefix) {
|
||||||
wb_replay_date_prefix = replay_prefix + capture_date + "mp_/";
|
wb_replay_date_prefix = replay_prefix + capture_date + "/";
|
||||||
|
|
||||||
if (capture_date.length > 0) {
|
if (capture_date.length > 0) {
|
||||||
wb_capture_date_part = "/" + capture_date + "/";
|
wb_capture_date_part = "/" + capture_date + "/";
|
||||||
|
@ -8,7 +8,9 @@
|
|||||||
wbinfo.is_frame = true;
|
wbinfo.is_frame = true;
|
||||||
</script>
|
</script>
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/default_banner.js'> </script>
|
|
||||||
|
{% include banner_html ignore missing %}
|
||||||
|
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
|
||||||
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
|
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
|
||||||
<!-- End WB Insert -->
|
<!-- End WB Insert -->
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
||||||
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
||||||
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
|
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
|
||||||
wbinfo.canon_url = "{{ canon_url }}";
|
wbinfo.top_url = "{{ top_url }}";
|
||||||
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
|
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
|
||||||
wbinfo.coll = "{{ wbrequest.coll }}";
|
wbinfo.coll = "{{ wbrequest.coll }}";
|
||||||
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
|
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
|
||||||
|
@ -6,6 +6,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
from pywb.utils.wbexception import NotFoundException
|
from pywb.utils.wbexception import NotFoundException
|
||||||
from pywb.utils.loaders import BlockLoader
|
from pywb.utils.loaders import BlockLoader
|
||||||
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
|
|
||||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
@ -15,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
|
|||||||
|
|
||||||
from views import J2TemplateView
|
from views import J2TemplateView
|
||||||
from replay_views import ReplayView
|
from replay_views import ReplayView
|
||||||
|
from pywb.framework.memento import MementoResponse
|
||||||
from pywb.utils.timeutils import datetime_to_timestamp
|
from pywb.utils.timeutils import datetime_to_timestamp
|
||||||
|
|
||||||
|
|
||||||
@ -30,13 +32,21 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
|||||||
'Search Page'))
|
'Search Page'))
|
||||||
|
|
||||||
self.is_frame_mode = config.get('framed_replay', False)
|
self.is_frame_mode = config.get('framed_replay', False)
|
||||||
|
self.response_class = WbResponse
|
||||||
|
|
||||||
if self.is_frame_mode:
|
if self.is_frame_mode:
|
||||||
html = config.get('frame_insert_html', 'ui/frame_insert.html')
|
html = config.get('frame_insert_html', 'ui/frame_insert.html')
|
||||||
self.frame_insert_view = (J2TemplateView.
|
self.frame_insert_view = (J2TemplateView.
|
||||||
create_template(html, 'Frame Insert'))
|
create_template(html, 'Frame Insert'))
|
||||||
|
|
||||||
|
self.banner_html = config.get('banner_html', 'banner.html')
|
||||||
|
|
||||||
|
if config.get('enable_memento', False):
|
||||||
|
self.response_class = MementoResponse
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.frame_insert_view = None
|
self.frame_insert_view = None
|
||||||
|
self.banner_html = None
|
||||||
|
|
||||||
def render_search_page(self, wbrequest, **kwargs):
|
def render_search_page(self, wbrequest, **kwargs):
|
||||||
if self.search_view:
|
if self.search_view:
|
||||||
@ -55,28 +65,36 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
|||||||
# (not supported in proxy mode)
|
# (not supported in proxy mode)
|
||||||
if (self.is_frame_mode and wbrequest.wb_url and
|
if (self.is_frame_mode and wbrequest.wb_url and
|
||||||
not wbrequest.wb_url.is_query() and
|
not wbrequest.wb_url.is_query() and
|
||||||
not wbrequest.wb_url.mod and
|
|
||||||
not wbrequest.options['is_proxy']):
|
not wbrequest.options['is_proxy']):
|
||||||
|
|
||||||
params = self.get_top_frame_params(wbrequest)
|
if wbrequest.wb_url.is_top_frame:
|
||||||
|
return self.get_top_frame_response(wbrequest)
|
||||||
return self.frame_insert_view.render_response(**params)
|
else:
|
||||||
|
wbrequest.final_mod = 'tf_'
|
||||||
|
|
||||||
return self.handle_request(wbrequest)
|
return self.handle_request(wbrequest)
|
||||||
|
|
||||||
def get_top_frame_params(self, wbrequest):
|
def get_top_frame_response(self, wbrequest):
|
||||||
if wbrequest.wb_url.timestamp:
|
if wbrequest.wb_url.timestamp:
|
||||||
timestamp = wbrequest.wb_url.timestamp
|
timestamp = wbrequest.wb_url.timestamp
|
||||||
else:
|
else:
|
||||||
timestamp = datetime_to_timestamp(datetime.utcnow())
|
timestamp = datetime_to_timestamp(datetime.utcnow())
|
||||||
|
|
||||||
embed_url = wbrequest.wb_url.to_str(mod='mp_')
|
embed_url = wbrequest.wb_url.to_str(mod='')
|
||||||
|
|
||||||
return dict(embed_url=embed_url,
|
params = dict(embed_url=embed_url,
|
||||||
wbrequest=wbrequest,
|
wbrequest=wbrequest,
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
url=wbrequest.wb_url.url,
|
url=wbrequest.wb_url.url,
|
||||||
content_type='text/html')
|
banner_html=self.banner_html)
|
||||||
|
|
||||||
|
headers = [('Content-Type', 'text/html; charset=utf-8')]
|
||||||
|
status_headers = StatusAndHeaders('200 OK', headers)
|
||||||
|
|
||||||
|
template_result = self.frame_insert_view.render_to_string(**params)
|
||||||
|
body = template_result.encode('utf-8')
|
||||||
|
|
||||||
|
return self.response_class(status_headers, [body], wbrequest=wbrequest)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -88,6 +88,9 @@ class J2TemplateView(object):
|
|||||||
def _make_loaders(self, template_dir):
|
def _make_loaders(self, template_dir):
|
||||||
loaders = []
|
loaders = []
|
||||||
loaders.append(FileSystemLoader(template_dir))
|
loaders.append(FileSystemLoader(template_dir))
|
||||||
|
# add relative and absolute path loaders for banner support
|
||||||
|
loaders.append(FileSystemLoader('.'))
|
||||||
|
loaders.append(FileSystemLoader('/'))
|
||||||
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
|
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
|
||||||
return loaders
|
return loaders
|
||||||
|
|
||||||
@ -128,28 +131,21 @@ class HeadInsertView(J2TemplateView):
|
|||||||
def create_insert_func(self, wbrequest,
|
def create_insert_func(self, wbrequest,
|
||||||
include_ts=True):
|
include_ts=True):
|
||||||
|
|
||||||
canon_url = wbrequest.wb_prefix + wbrequest.wb_url.to_str(mod='')
|
top_url = wbrequest.wb_prefix
|
||||||
|
top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
|
||||||
|
|
||||||
include_wombat = not wbrequest.wb_url.is_banner_only
|
include_wombat = not wbrequest.wb_url.is_banner_only
|
||||||
|
|
||||||
def make_head_insert(rule, cdx):
|
def make_head_insert(rule, cdx):
|
||||||
return (self.render_to_string(wbrequest=wbrequest,
|
return (self.render_to_string(wbrequest=wbrequest,
|
||||||
cdx=cdx,
|
cdx=cdx,
|
||||||
canon_url=canon_url,
|
top_url=top_url,
|
||||||
include_ts=include_ts,
|
include_ts=include_ts,
|
||||||
include_wombat=include_wombat,
|
include_wombat=include_wombat,
|
||||||
banner_html=self.banner_html,
|
banner_html=self.banner_html,
|
||||||
rule=rule))
|
rule=rule))
|
||||||
return make_head_insert
|
return make_head_insert
|
||||||
|
|
||||||
def _make_loaders(self, template_dir):
|
|
||||||
loaders = []
|
|
||||||
loaders.append(FileSystemLoader(template_dir))
|
|
||||||
# add relative and absolute path loaders
|
|
||||||
loaders.append(FileSystemLoader('.'))
|
|
||||||
loaders.append(FileSystemLoader('/'))
|
|
||||||
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
|
|
||||||
return loaders
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config):
|
def init_from_config(config):
|
||||||
view = config.get('head_insert_view')
|
view = config.get('head_insert_view')
|
||||||
|
2
setup.py
2
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='pywb',
|
name='pywb',
|
||||||
version='0.6.1',
|
version='0.6.2',
|
||||||
url='https://github.com/ikreymer/pywb',
|
url='https://github.com/ikreymer/pywb',
|
||||||
author='Ilya Kreymer',
|
author='Ilya Kreymer',
|
||||||
author_email='ikreymer@gmail.com',
|
author_email='ikreymer@gmail.com',
|
||||||
|
@ -87,19 +87,19 @@ class TestWb:
|
|||||||
assert actual_len == 3, actual_len
|
assert actual_len == 3, actual_len
|
||||||
|
|
||||||
def test_replay_top_frame(self):
|
def test_replay_top_frame(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
|
||||||
|
|
||||||
assert '<iframe ' in resp.body
|
assert '<iframe ' in resp.body
|
||||||
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body
|
assert '/pywb/20140127171238/http://www.iana.org/' in resp.body
|
||||||
|
|
||||||
def test_replay_content(self):
|
def test_replay_content(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert '"20140127171238"' in resp.body
|
assert '"20140127171238"' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert 'WB_wombat_init' in resp.body
|
assert 'WB_wombat_init' in resp.body
|
||||||
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body
|
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
||||||
|
|
||||||
def test_replay_non_frame_content(self):
|
def test_replay_non_frame_content(self):
|
||||||
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
|
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
|
||||||
@ -110,28 +110,28 @@ class TestWb:
|
|||||||
assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
||||||
|
|
||||||
def test_replay_non_surt(self):
|
def test_replay_non_surt(self):
|
||||||
resp = self.testapp.get('/pywb-nosurt/20140103030321mp_/http://example.com?example=1')
|
resp = self.testapp.get('/pywb-nosurt/20140103030321/http://example.com?example=1')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert '"20140103030321"' in resp.body
|
assert '"20140103030321"' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb-nosurt/20140103030321mp_/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_zero_len_revisit(self):
|
def test_zero_len_revisit(self):
|
||||||
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com?example=2')
|
resp = self.testapp.get('/pywb/20140603030341/http://example.com?example=2')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert '"20140603030341"' in resp.body
|
assert '"20140603030341"' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb/20140603030341mp_/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb/20140603030341/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_replay_url_agnostic_revisit(self):
|
def test_replay_url_agnostic_revisit(self):
|
||||||
resp = self.testapp.get('/pywb/20130729195151mp_/http://www.example.com/')
|
resp = self.testapp.get('/pywb/20130729195151/http://www.example.com/')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert '"20130729195151"' in resp.body
|
assert '"20130729195151"' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb/20130729195151mp_/http://www.iana.org/domains/example"' in resp.body
|
assert '/pywb/20130729195151/http://www.iana.org/domains/example"' in resp.body
|
||||||
|
|
||||||
def test_replay_cdx_mod(self):
|
def test_replay_cdx_mod(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
|
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
|
||||||
@ -200,56 +200,56 @@ class TestWb:
|
|||||||
assert resp.content_type == 'application/x-javascript'
|
assert resp.content_type == 'application/x-javascript'
|
||||||
|
|
||||||
def test_redirect_1(self):
|
def test_redirect_1(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171237mp_/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://iana.org')
|
assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
|
||||||
|
|
||||||
|
|
||||||
def test_redirect_replay_2(self):
|
def test_redirect_replay_2(self):
|
||||||
resp = self.testapp.get('/pywb/mp_/http://example.com/')
|
resp = self.testapp.get('/pywb/http://example.com/')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
assert resp.headers['Location'].endswith('/20140127171251mp_/http://example.com')
|
assert resp.headers['Location'].endswith('/20140127171251/http://example.com')
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
|
|
||||||
#check resp
|
#check resp
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
assert '"20140127171251"' in resp.body
|
assert '"20140127171251"' in resp.body
|
||||||
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_redirect_relative_3(self):
|
def test_redirect_relative_3(self):
|
||||||
# webtest uses Host: localhost:80 by default
|
# webtest uses Host: localhost:80 by default
|
||||||
# first two requests should result in same redirect
|
# first two requests should result in same redirect
|
||||||
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
target = 'http://localhost:80/pywb/2014/http://iana.org/_css/2013.1/screen.css'
|
||||||
|
|
||||||
# without timestamp
|
# without timestamp
|
||||||
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
# with timestamp
|
# with timestamp
|
||||||
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css')
|
assert resp.headers['Location'].endswith('/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css')
|
||||||
|
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == 'text/css'
|
assert resp.content_type == 'text/css'
|
||||||
|
|
||||||
def test_rel_self_redirect(self):
|
def test_rel_self_redirect(self):
|
||||||
uri = '/pywb/20140126200927mp_/http://www.iana.org/domains/root/db'
|
uri = '/pywb/20140126200927/http://www.iana.org/domains/root/db'
|
||||||
resp = self.testapp.get(uri, status=302)
|
resp = self.testapp.get(uri, status=302)
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140126200928mp_/http://www.iana.org/domains/root/db')
|
assert resp.headers['Location'].endswith('/pywb/20140126200928/http://www.iana.org/domains/root/db')
|
||||||
|
|
||||||
#def test_referrer_self_redirect(self):
|
#def test_referrer_self_redirect(self):
|
||||||
# uri = '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css'
|
# uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
|
||||||
# host = 'somehost:8082'
|
# host = 'somehost:8082'
|
||||||
# referrer = 'http://' + host + uri
|
# referrer = 'http://' + host + uri
|
||||||
|
|
||||||
@ -262,33 +262,33 @@ class TestWb:
|
|||||||
# assert resp.status_int == 302
|
# assert resp.status_int == 302
|
||||||
|
|
||||||
def test_not_existant_warc_other_capture(self):
|
def test_not_existant_warc_other_capture(self):
|
||||||
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=2')
|
resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=2')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
|
assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
|
||||||
|
|
||||||
def test_missing_revisit_other_capture(self):
|
def test_missing_revisit_other_capture(self):
|
||||||
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=2')
|
resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=2')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
|
assert resp.headers['Location'].endswith('/pywb/20140603030341/http://example.com?example=2')
|
||||||
|
|
||||||
def test_not_existant_warc_no_other(self):
|
def test_not_existant_warc_no_other(self):
|
||||||
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=3', status = 503)
|
resp = self.testapp.get('/pywb/20140703030321/http://example.com?example=3', status = 503)
|
||||||
assert resp.status_int == 503
|
assert resp.status_int == 503
|
||||||
|
|
||||||
def test_missing_revisit_no_other(self):
|
def test_missing_revisit_no_other(self):
|
||||||
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=3', status = 503)
|
resp = self.testapp.get('/pywb/20140603030351/http://example.com?example=3', status = 503)
|
||||||
assert resp.status_int == 503
|
assert resp.status_int == 503
|
||||||
|
|
||||||
def test_live_frame(self):
|
def test_live_frame(self):
|
||||||
resp = self.testapp.get('/live/mp_/http://example.com/?test=test')
|
resp = self.testapp.get('/live/http://example.com/?test=test')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
def test_live_fallback(self):
|
def test_live_fallback(self):
|
||||||
resp = self.testapp.get('/pywb-fallback/mp_/http://example.com/?test=test')
|
resp = self.testapp.get('/pywb-fallback//http://example.com/?test=test')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
def test_post_1(self):
|
def test_post_1(self):
|
||||||
resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
resp = self.testapp.post('/pywb/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
||||||
|
|
||||||
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
|
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
|
||||||
#assert resp.status_int == 307
|
#assert resp.status_int == 307
|
||||||
@ -303,24 +303,24 @@ class TestWb:
|
|||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
|
||||||
def test_post_2(self):
|
def test_post_2(self):
|
||||||
resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'})
|
resp = self.testapp.post('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'})
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"data": "^"' in resp.body
|
assert '"data": "^"' in resp.body
|
||||||
|
|
||||||
def test_post_invalid(self):
|
def test_post_invalid(self):
|
||||||
# not json
|
# not json
|
||||||
resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
|
resp = self.testapp.post_json('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
|
||||||
assert resp.status_int == 404
|
assert resp.status_int == 404
|
||||||
|
|
||||||
def test_post_redirect(self):
|
def test_post_redirect(self):
|
||||||
# post handled without redirect (since 307 not allowed)
|
# post handled without redirect (since 307 not allowed)
|
||||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/post')])
|
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014/http://httpbin.org/post')])
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"foo": "bar"' in resp.body
|
assert '"foo": "bar"' in resp.body
|
||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
|
||||||
def test_excluded_content(self):
|
def test_excluded_content(self):
|
||||||
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
|
resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
|
||||||
assert resp.status_int == 403
|
assert resp.status_int == 403
|
||||||
assert 'Excluded' in resp.body
|
assert 'Excluded' in resp.body
|
||||||
|
|
||||||
@ -365,7 +365,7 @@ class TestWb:
|
|||||||
|
|
||||||
|
|
||||||
def test_error(self):
|
def test_error(self):
|
||||||
resp = self.testapp.get('/pywb/mp_/?abc', status = 400)
|
resp = self.testapp.get('/pywb/?abc', status = 400)
|
||||||
assert resp.status_int == 400
|
assert resp.status_int == 400
|
||||||
assert 'Invalid Url: http://?abc' in resp.body
|
assert 'Invalid Url: http://?abc' in resp.body
|
||||||
|
|
||||||
|
@ -10,32 +10,32 @@ class TestLiveRewriter:
|
|||||||
|
|
||||||
def test_live_rewrite_1(self):
|
def test_live_rewrite_1(self):
|
||||||
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
|
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
|
||||||
resp = self.testapp.get('/rewrite/mp_/http://example.com/', headers=headers)
|
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
def test_live_rewrite_redirect_2(self):
|
def test_live_rewrite_redirect_2(self):
|
||||||
resp = self.testapp.get('/rewrite/mp_/http://facebook.com/')
|
resp = self.testapp.get('/rewrite/http://facebook.com/')
|
||||||
assert resp.status_int == 301
|
assert resp.status_int == 301
|
||||||
|
|
||||||
def test_live_rewrite_post(self):
|
def test_live_rewrite_post(self):
|
||||||
resp = self.testapp.post('/rewrite/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
resp = self.testapp.post('/rewrite/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"foo": "bar"' in resp.body
|
assert '"foo": "bar"' in resp.body
|
||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
def test_live_rewrite_frame(self):
|
def test_live_rewrite_frame(self):
|
||||||
resp = self.testapp.get('/rewrite/http://example.com/')
|
resp = self.testapp.get('/rewrite/tf_/http://example.com/')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '<iframe ' in resp.body
|
assert '<iframe ' in resp.body
|
||||||
assert 'src="/rewrite/mp_/http://example.com/"' in resp.body
|
assert 'src="/rewrite/http://example.com/"' in resp.body
|
||||||
|
|
||||||
def test_live_invalid(self):
|
def test_live_invalid(self):
|
||||||
resp = self.testapp.get('/rewrite/mp_/http://abcdef', status=400)
|
resp = self.testapp.get('/rewrite/http://abcdef', status=400)
|
||||||
assert resp.status_int == 400
|
assert resp.status_int == 400
|
||||||
|
|
||||||
def test_live_invalid_2(self):
|
def test_live_invalid_2(self):
|
||||||
resp = self.testapp.get('/rewrite/mp_/@#$@#$', status=400)
|
resp = self.testapp.get('/rewrite/@#$@#$', status=400)
|
||||||
assert resp.status_int == 400
|
assert resp.status_int == 400
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class TestWb:
|
|||||||
"""
|
"""
|
||||||
TimeGate with no Accept-Datetime header
|
TimeGate with no Accept-Datetime header
|
||||||
"""
|
"""
|
||||||
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css')
|
resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css')
|
||||||
|
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ class TestWb:
|
|||||||
|
|
||||||
assert MEMENTO_DATETIME not in resp.headers
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
assert '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
assert '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
def test_timegate_accept_datetime(self):
|
def test_timegate_accept_datetime(self):
|
||||||
@ -54,7 +54,7 @@ class TestWb:
|
|||||||
TimeGate with Accept-Datetime header
|
TimeGate with Accept-Datetime header
|
||||||
"""
|
"""
|
||||||
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||||
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
resp = self.testapp.get('/pywb//http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||||
|
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
@ -67,7 +67,7 @@ class TestWb:
|
|||||||
|
|
||||||
assert MEMENTO_DATETIME not in resp.headers
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
assert '/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
assert '/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css' in resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
def test_non_timegate_intermediate_redir(self):
|
def test_non_timegate_intermediate_redir(self):
|
||||||
@ -76,7 +76,7 @@ class TestWb:
|
|||||||
"""
|
"""
|
||||||
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||||
# not a timegate, partial timestamp /2014/ present
|
# not a timegate, partial timestamp /2014/ present
|
||||||
resp = self.testapp.get('/pywb/2014mp_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
resp = self.testapp.get('/pywb/2014/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||||
|
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
@ -90,14 +90,64 @@ class TestWb:
|
|||||||
|
|
||||||
|
|
||||||
# redirect to latest, not negotiation via Accept-Datetime
|
# redirect to latest, not negotiation via Accept-Datetime
|
||||||
assert '/pywb/20140127171239mp_/' in resp.headers['Location']
|
assert '/pywb/20140127171239/' in resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
|
def test_top_frame_no_date(self):
|
||||||
|
"""
|
||||||
|
A top-frame request with no date, must treat as intermediate
|
||||||
|
Include timemap, timegate, original headers
|
||||||
|
"""
|
||||||
|
|
||||||
|
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||||
|
|
||||||
|
# not a timegate, ignore ACCEPT_DATETIME
|
||||||
|
resp = self.testapp.get('/pywb/tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||||
|
|
||||||
|
assert resp.status_int == 200
|
||||||
|
|
||||||
|
# no vary header
|
||||||
|
assert VARY not in resp.headers
|
||||||
|
|
||||||
|
# no memento-datetime
|
||||||
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
|
links = self.get_links(resp)
|
||||||
|
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||||
|
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||||
|
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||||
|
|
||||||
|
def test_top_frame_with_date(self):
|
||||||
|
"""
|
||||||
|
A top-frame request with date, treat as intermediate
|
||||||
|
Include timemap, timegate, original headers and a link to the possible memento
|
||||||
|
"""
|
||||||
|
|
||||||
|
headers = {ACCEPT_DATETIME: 'Sun, 26 Jan 2014 20:08:04'}
|
||||||
|
|
||||||
|
# not a timegate, ignore ACCEPT_DATETIME
|
||||||
|
resp = self.testapp.get('/pywb/20141012tf_/http://www.iana.org/_css/2013.1/screen.css', headers=headers)
|
||||||
|
|
||||||
|
assert resp.status_int == 200
|
||||||
|
|
||||||
|
# no vary header
|
||||||
|
assert VARY not in resp.headers
|
||||||
|
|
||||||
|
# no memento-datetime
|
||||||
|
assert MEMENTO_DATETIME not in resp.headers
|
||||||
|
|
||||||
|
links = self.get_links(resp)
|
||||||
|
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||||
|
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||||
|
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||||
|
|
||||||
|
assert '<http://localhost:80/pywb/20141012/http://www.iana.org/_css/2013.1/screen.css>; rel="memento"' in links
|
||||||
|
|
||||||
def test_memento_url(self):
|
def test_memento_url(self):
|
||||||
"""
|
"""
|
||||||
Memento response, 200 capture
|
Memento response, 200 capture
|
||||||
"""
|
"""
|
||||||
resp = self.testapp.get('/pywb/20140126200804mp_/http://www.iana.org/_css/2013.1/screen.css')
|
resp = self.testapp.get('/pywb/20140126200804/http://www.iana.org/_css/2013.1/screen.css')
|
||||||
|
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
@ -105,7 +155,7 @@ class TestWb:
|
|||||||
|
|
||||||
links = self.get_links(resp)
|
links = self.get_links(resp)
|
||||||
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
assert '<http://www.iana.org/_css/2013.1/screen.css>; rel="original"' in links
|
||||||
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
assert '<http://localhost:80/pywb/http://www.iana.org/_css/2013.1/screen.css>; rel="timegate"' in links
|
||||||
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
assert self.make_timemap_link('http://www.iana.org/_css/2013.1/screen.css') in links
|
||||||
|
|
||||||
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
|
assert resp.headers[MEMENTO_DATETIME] == 'Sun, 26 Jan 2014 20:08:04 GMT'
|
||||||
@ -115,7 +165,7 @@ class TestWb:
|
|||||||
"""
|
"""
|
||||||
Memento (capture) of a 302 response
|
Memento (capture) of a 302 response
|
||||||
"""
|
"""
|
||||||
resp = self.testapp.get('/pywb/20140128051539mp_/http://www.iana.org/domains/example')
|
resp = self.testapp.get('/pywb/20140128051539/http://www.iana.org/domains/example')
|
||||||
|
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
@ -123,7 +173,7 @@ class TestWb:
|
|||||||
|
|
||||||
links = self.get_links(resp)
|
links = self.get_links(resp)
|
||||||
assert '<http://www.iana.org/domains/example>; rel="original"' in links
|
assert '<http://www.iana.org/domains/example>; rel="original"' in links
|
||||||
assert '<http://localhost:80/pywb/mp_/http://www.iana.org/domains/example>; rel="timegate"' in links
|
assert '<http://localhost:80/pywb/http://www.iana.org/domains/example>; rel="timegate"' in links
|
||||||
assert self.make_timemap_link('http://www.iana.org/domains/example') in links
|
assert self.make_timemap_link('http://www.iana.org/domains/example') in links
|
||||||
|
|
||||||
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
|
assert resp.headers[MEMENTO_DATETIME] == 'Tue, 28 Jan 2014 05:15:39 GMT'
|
||||||
@ -147,12 +197,12 @@ rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT"
|
|||||||
|
|
||||||
assert lines[1] == '<http://example.com?example=1>; rel="original",'
|
assert lines[1] == '<http://example.com?example=1>; rel="original",'
|
||||||
|
|
||||||
assert lines[2] == '<http://localhost:80/pywb/mp_/http://example.com?example=1>; rel="timegate",'
|
assert lines[2] == '<http://localhost:80/pywb/http://example.com?example=1>; rel="timegate",'
|
||||||
|
|
||||||
assert lines[3] == '<http://localhost:80/pywb/20140103030321mp_/http://example.com?example=1>; \
|
assert lines[3] == '<http://localhost:80/pywb/20140103030321/http://example.com?example=1>; \
|
||||||
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT",'
|
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT",'
|
||||||
|
|
||||||
assert lines[4] == '<http://localhost:80/pywb/20140103030341mp_/http://example.com?example=1>; \
|
assert lines[4] == '<http://localhost:80/pywb/20140103030341/http://example.com?example=1>; \
|
||||||
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
|
rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"'
|
||||||
|
|
||||||
def test_timemap_2(self):
|
def test_timemap_2(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user