diff --git a/CHANGES.rst b/CHANGES.rst
index f245db9b..f0da6a23 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,10 @@
-pywb 0.6.1 changelist
+pywb 0.6.2 changelist
~~~~~~~~~~~~~~~~~~~~~
+* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
+
+* Enable Memento support for framed replay, include Memento headers in top frame
+
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.
diff --git a/README.rst b/README.rst
index 1376fd3f..5f04477b 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,4 @@
-PyWb 0.6.1
+PyWb 0.6.2
==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop
diff --git a/pywb/framework/memento.py b/pywb/framework/memento.py
index d7221adb..921da30c 100644
--- a/pywb/framework/memento.py
+++ b/pywb/framework/memento.py
@@ -46,15 +46,22 @@ class MementoRespMixin(object):
if not wbrequest or not wbrequest.wb_url:
return
- is_timegate = wbrequest.options.get('is_timegate', False)
+ is_top_frame = wbrequest.wb_url.is_top_frame
+
+ is_timegate = wbrequest.options.get('is_timegate', False) and not is_top_frame
if is_timegate:
self.status_headers.headers.append(('Vary', 'accept-datetime'))
# Determine if memento:
- # if no cdx included, definitely not a memento
+ is_memento = False
+
+ # if no cdx included, not a memento, unless top-frame special
if not cdx:
- is_memento = False
+ # special case: include the headers but except Memento-Datetime
+ # since this is really an intermediate resource
+ if is_top_frame:
+ is_memento = True
# otherwise, if in proxy mode, then always a memento
elif wbrequest.options['is_proxy']:
@@ -64,13 +71,19 @@ class MementoRespMixin(object):
else:
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
- if is_memento:
+ link = []
+
+ if is_memento and cdx:
http_date = timestamp_to_http_date(cdx['timestamp'])
self.status_headers.headers.append(('Memento-Datetime', http_date))
- req_url = wbrequest.wb_url.url
+ elif is_memento and is_top_frame and wbrequest.wb_url.timestamp:
+ # top frame special case
+ canon_link = wbrequest.urlrewriter.prefix
+ canon_link += wbrequest.wb_url.to_str(mod='')
+ link.append(self.make_link(canon_link, 'memento'))
- link = []
+ req_url = wbrequest.wb_url.url
if is_memento and is_timegate:
link.append(self.make_link(req_url, 'original timegate'))
@@ -82,7 +95,8 @@ class MementoRespMixin(object):
link.append(self.make_timemap_link(wbrequest))
if is_memento and not is_timegate:
- timegate = wbrequest.urlrewriter.get_timestamp_url('')
+ timegate = wbrequest.urlrewriter.prefix
+ timegate += wbrequest.wb_url.to_str(mod='', timestamp='')
link.append(self.make_link(timegate, 'timegate'))
link = ', '.join(link)
@@ -115,7 +129,7 @@ def make_memento_link(cdx, prefix, datetime=None, rel='memento', end=',\n'):
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
string = WbUrl.to_wburl_str(url=cdx['original'],
- mod='mp_',
+ mod='',
timestamp=cdx['timestamp'],
type=WbUrl.REPLAY)
@@ -148,7 +162,7 @@ def make_timemap(wbrequest, cdx_lines):
# timegate link
timegate = '<{0}>; rel="timegate",\n'
- yield timegate.format(prefix + 'mp_/' + url)
+ yield timegate.format(prefix + url)
# first memento link
yield make_memento_link(first_cdx, prefix,
diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py
index 7f2c4337..8cbabc49 100644
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@@ -51,6 +51,8 @@ class WbRequest(object):
self.coll = coll
+ self.final_mod = ''
+
if not host_prefix:
host_prefix = self.make_host_prefix(env)
diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py
index 9f19385a..51267a61 100644
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@@ -25,7 +25,7 @@ class RegexRewriter(object):
@staticmethod
def archival_rewrite(rewriter):
- return lambda string: rewriter.rewrite(string, 'mp_')
+ return lambda string: rewriter.rewrite(string)
#@staticmethod
#def replacer(other):
diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py
index 3e98e923..029dde28 100644
--- a/pywb/rewrite/rewrite_content.py
+++ b/pywb/rewrite/rewrite_content.py
@@ -28,7 +28,7 @@ class RewriteContent:
ds_rules_file=ds_rules_file)
if is_framed_replay:
- self.defmod = 'mp_'
+ self.defmod = ''
else:
self.defmod = ''
diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py
index f3a5d38d..593a4096 100644
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@@ -62,7 +62,7 @@ ur"""
# Script tag
>>> parse('')
-
+
# Script tag + crossorigin
>>> parse('')
@@ -70,21 +70,21 @@ ur"""
# Unterminated script tag, handle and auto-terminate
>>> parse('
+
>>> parse('')
-
+
>>> parse('
')
-
+
# Style
>>> parse('')
-
+
# Unterminated style tag, handle and auto-terminate
>>> parse('
+
# Head Insertion
>>> parse('Test', head_insert = '')
diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py
index 69a367a9..e0a95a84 100644
--- a/pywb/rewrite/test/test_regex_rewriters.py
+++ b/pywb/rewrite/test/test_regex_rewriters.py
@@ -12,16 +12,16 @@ r"""
#=================================================================
>>> _test_js('location = "http://example.com/abc.html"')
-'WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"'
+'WB_wombat_location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js(r'location = "http:\/\/example.com/abc.html"')
-'WB_wombat_location = "/web/20131010mp_/http:\\/\\/example.com/abc.html"'
+'WB_wombat_location = "/web/20131010/http:\\/\\/example.com/abc.html"'
>>> _test_js(r'location = "http:\\/\\/example.com/abc.html"')
-'WB_wombat_location = "/web/20131010mp_/http:\\\\/\\\\/example.com/abc.html"'
+'WB_wombat_location = "/web/20131010/http:\\\\/\\\\/example.com/abc.html"'
>>> _test_js(r"location = 'http://example.com/abc.html/'")
-"WB_wombat_location = '/web/20131010mp_/http://example.com/abc.html/'"
+"WB_wombat_location = '/web/20131010/http://example.com/abc.html/'"
>>> _test_js(r'location = http://example.com/abc.html/')
'WB_wombat_location = http://example.com/abc.html/'
@@ -37,21 +37,21 @@ r"""
'"/location" == some_location_val; locations = WB_wombat_location;'
>>> _test_js('cool_Location = "http://example.com/abc.html"')
-'cool_Location = "/web/20131010mp_/http://example.com/abc.html"'
+'cool_Location = "/web/20131010/http://example.com/abc.html"'
>>> _test_js('window.location = "http://example.com/abc.html" document.domain = "anotherdomain.com"')
-'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
+'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html" document.WB_wombat_domain = "anotherdomain.com"'
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
# custom rules added
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
-'window.WB_wombat_location = "/web/20131010mp_/http://example.com/abc.html"; /*some_func(); */'
+'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
# scheme-agnostic
>>> _test_js('cool_Location = "//example.com/abc.html" //comment')
-'cool_Location = "/web/20131010mp_/http://example.com/abc.html" //comment'
+'cool_Location = "/web/20131010/http://example.com/abc.html" //comment'
# document.cookie test
>>> _test_js('document.cookie = "a=b; Path=/"')
@@ -59,7 +59,7 @@ r"""
# js-escaped
>>> _test_js('"http:\\/\\/www.example.com\\/some\\/path\\/?query=1"')
-'"/web/20131010mp_/http:\\/\\/www.example.com\\/some\\/path\\/?query=1"'
+'"/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1"'
#=================================================================
@@ -67,68 +67,68 @@ r"""
#=================================================================
>>> _test_xml('')
-''
+''
>>> _test_xml('')
-''
+''
>>> _test_xml(' http://example.comabchttp://example.com')
-' /web/20131010mp_/http://example.comabchttp://example.com'
+' /web/20131010/http://example.comabchttp://example.com'
>>> _test_xml(' http://www.example.com/blah http://example.com ')
-' /web/20131010mp_/http://www.example.com/blah /web/20131010mp_/http://example.com '
+' /web/20131010/http://www.example.com/blah /web/20131010/http://example.com '
#=================================================================
# CSS Rewriting
#=================================================================
>>> _test_css("background: url('/some/path.html')")
-"background: url('/web/20131010mp_/http://example.com/some/path.html')"
+"background: url('/web/20131010/http://example.com/some/path.html')"
>>> _test_css("background: url('../path.html')")
-"background: url('/web/20131010mp_/http://example.com/path.html')"
+"background: url('/web/20131010/http://example.com/path.html')"
>>> _test_css("background: url(\"http://domain.com/path.html\")")
-'background: url("/web/20131010mp_/http://domain.com/path.html")'
+'background: url("/web/20131010/http://domain.com/path.html")'
>>> _test_css("background: url(file.jpeg)")
-'background: url(/web/20131010mp_/http://example.com/file.jpeg)'
+'background: url(/web/20131010/http://example.com/file.jpeg)'
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
-"background:#abc url('/web/20131010mp_/http://example.com/static/images/layout/logo.png')"
+"background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')"
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
-"background:#000 url('/web/20131010mp_/http://example.com/images/layout/logo.png')"
+"background:#000 url('/web/20131010/http://example.com/images/layout/logo.png')"
>>> _test_css("background: url('')")
"background: url('')"
>>> _test_css("background: url (\"weirdpath\')")
-'background: url ("/web/20131010mp_/http://example.com/weirdpath\')'
+'background: url ("/web/20131010/http://example.com/weirdpath\')'
>>> _test_css("@import url ('path.css')")
-"@import url ('/web/20131010mp_/http://example.com/path.css')"
+"@import url ('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import url('path.css')")
-"@import url('/web/20131010mp_/http://example.com/path.css')"
+"@import url('/web/20131010/http://example.com/path.css')"
>>> _test_css("@import ( 'path.css')")
-"@import ( '/web/20131010mp_/http://example.com/path.css')"
+"@import ( '/web/20131010/http://example.com/path.css')"
>>> _test_css("@import \"path.css\"")
-'@import "/web/20131010mp_/http://example.com/path.css"'
+'@import "/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../path.css\"")
-'@import (\'/web/20131010mp_/http://example.com/path.css"'
+'@import (\'/web/20131010/http://example.com/path.css"'
>>> _test_css("@import ('../url.css\"")
-'@import (\'/web/20131010mp_/http://example.com/url.css"'
+'@import (\'/web/20131010/http://example.com/url.css"'
>>> _test_css("@import (\"url.css\")")
-'@import ("/web/20131010mp_/http://example.com/url.css")'
+'@import ("/web/20131010/http://example.com/url.css")'
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
-'@import url(/web/20131010mp_/http://example.com/url.css)\n@import url(/web/20131010mp_/http://example.com/anotherurl.css)\n @import url(/web/20131010mp_/http://example.com/and_a_third.css)'
+'@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)'
"""
diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py
index f826108f..cd5a2518 100644
--- a/pywb/rewrite/wburl.py
+++ b/pywb/rewrite/wburl.py
@@ -193,10 +193,14 @@ class WbUrl(BaseWbUrl):
return (not self.mod or
self.mod == 'mp_')
+ @property
+ def is_top_frame(self):
+ return (self.mod == 'tf_')
+
@property
def is_embed(self):
return (self.mod and
- self.mod not in ('id_', 'mp_', 'bn_'))
+ self.mod not in ('id_', 'mp_', 'tf_', 'bn_'))
@property
def is_banner_only(self):
diff --git a/pywb/static/wb.js b/pywb/static/wb.js
index 9406cacd..5a282573 100644
--- a/pywb/static/wb.js
+++ b/pywb/static/wb.js
@@ -118,9 +118,9 @@ function notify_top() {
this.load = function() {
if ((window.self == window.top) && wbinfo) {
- if (wbinfo.canon_url && (window.location.href != wbinfo.canon_url) && wbinfo.mod != "bn_") {
+ if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") {
// Auto-redirect to top frame
- window.location.replace(wbinfo.canon_url);
+ window.location.replace(wbinfo.top_url);
} else {
// Init Banner (no frame or top frame)
add_event("readystatechange", init_banner, document);
diff --git a/pywb/static/wb_frame.js b/pywb/static/wb_frame.js
index cc6a7b0f..1210ada9 100644
--- a/pywb/static/wb_frame.js
+++ b/pywb/static/wb_frame.js
@@ -1,23 +1,25 @@
var LIVE_COOKIE_REGEX = /pywb.timestamp=([\d]{1,14})/;
+var TS_REGEX = /\/([\d]{1,14})\//;
+
var curr_state = {};
function make_outer_url(url, ts)
{
if (ts) {
- return wbinfo.prefix + ts + "/" + url;
+ return wbinfo.prefix + ts + "tf_/" + url;
} else {
- return wbinfo.prefix + url;
+ return wbinfo.prefix + "tf_/" + url;
}
}
function make_inner_url(url, ts)
{
if (ts) {
- return wbinfo.prefix + ts + "mp_/" + url;
+ return wbinfo.prefix + ts + "/" + url;
} else {
- return wbinfo.prefix + "mp_/" + url;
+ return wbinfo.prefix + "/" + url;
}
}
@@ -39,7 +41,7 @@ function push_state(url, timestamp, capture_str, is_live) {
state.capture_str = capture_str;
state.is_live = is_live;
- window.history.replaceState(state, "", state.outer_url);
+ window.history.replaceState(state, "", state.inner_url);
set_state(state);
}
@@ -52,16 +54,12 @@ function pop_state(state) {
function extract_ts(url)
{
- var inx = url.indexOf("mp_");
- if (inx < 0) {
+ var result = value.match(TS_REGEX);
+ if (!result) {
return "";
}
- url = url.substring(0, inx);
- inx = url.lastIndexOf("/");
- if (inx <= 0) {
- return "";
- }
- return url.substring(inx + 1);
+
+ return result[1];
}
function extract_replay_url(url) {
diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js
index d1fd3db0..1117f657 100644
--- a/pywb/static/wombat.js
+++ b/pywb/static/wombat.js
@@ -712,7 +712,7 @@ WB_wombat_init = (function() {
wb_replay_prefix = replay_prefix;
if (wb_replay_prefix) {
- wb_replay_date_prefix = replay_prefix + capture_date + "mp_/";
+ wb_replay_date_prefix = replay_prefix + capture_date + "/";
if (capture_date.length > 0) {
wb_capture_date_part = "/" + capture_date + "/";
diff --git a/pywb/ui/frame_insert.html b/pywb/ui/frame_insert.html
index 30e003e6..7cd04306 100644
--- a/pywb/ui/frame_insert.html
+++ b/pywb/ui/frame_insert.html
@@ -8,7 +8,9 @@
wbinfo.is_frame = true;
-
+
+{% include banner_html ignore missing %}
+
diff --git a/pywb/ui/head_insert.html b/pywb/ui/head_insert.html
index 9bc84dc7..71a0761b 100644
--- a/pywb/ui/head_insert.html
+++ b/pywb/ui/head_insert.html
@@ -16,7 +16,7 @@
wbinfo.timestamp = "{{ cdx.timestamp }}";
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
- wbinfo.canon_url = "{{ canon_url }}";
+ wbinfo.top_url = "{{ top_url }}";
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
wbinfo.coll = "{{ wbrequest.coll }}";
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
diff --git a/pywb/webapp/handlers.py b/pywb/webapp/handlers.py
index 9b5fa718..b862e99a 100644
--- a/pywb/webapp/handlers.py
+++ b/pywb/webapp/handlers.py
@@ -6,6 +6,7 @@ from datetime import datetime
from pywb.utils.wbexception import NotFoundException
from pywb.utils.loaders import BlockLoader
+from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse
@@ -15,6 +16,7 @@ from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView
from replay_views import ReplayView
+from pywb.framework.memento import MementoResponse
from pywb.utils.timeutils import datetime_to_timestamp
@@ -30,13 +32,21 @@ class SearchPageWbUrlHandler(WbUrlHandler):
'Search Page'))
self.is_frame_mode = config.get('framed_replay', False)
+ self.response_class = WbResponse
if self.is_frame_mode:
html = config.get('frame_insert_html', 'ui/frame_insert.html')
self.frame_insert_view = (J2TemplateView.
create_template(html, 'Frame Insert'))
+
+ self.banner_html = config.get('banner_html', 'banner.html')
+
+ if config.get('enable_memento', False):
+ self.response_class = MementoResponse
+
else:
self.frame_insert_view = None
+ self.banner_html = None
def render_search_page(self, wbrequest, **kwargs):
if self.search_view:
@@ -55,28 +65,36 @@ class SearchPageWbUrlHandler(WbUrlHandler):
# (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and
not wbrequest.wb_url.is_query() and
- not wbrequest.wb_url.mod and
not wbrequest.options['is_proxy']):
- params = self.get_top_frame_params(wbrequest)
-
- return self.frame_insert_view.render_response(**params)
+ if wbrequest.wb_url.is_top_frame:
+ return self.get_top_frame_response(wbrequest)
+ else:
+ wbrequest.final_mod = 'tf_'
return self.handle_request(wbrequest)
- def get_top_frame_params(self, wbrequest):
+ def get_top_frame_response(self, wbrequest):
if wbrequest.wb_url.timestamp:
timestamp = wbrequest.wb_url.timestamp
else:
timestamp = datetime_to_timestamp(datetime.utcnow())
- embed_url = wbrequest.wb_url.to_str(mod='mp_')
+ embed_url = wbrequest.wb_url.to_str(mod='')
- return dict(embed_url=embed_url,
- wbrequest=wbrequest,
- timestamp=timestamp,
- url=wbrequest.wb_url.url,
- content_type='text/html')
+ params = dict(embed_url=embed_url,
+ wbrequest=wbrequest,
+ timestamp=timestamp,
+ url=wbrequest.wb_url.url,
+ banner_html=self.banner_html)
+
+ headers = [('Content-Type', 'text/html; charset=utf-8')]
+ status_headers = StatusAndHeaders('200 OK', headers)
+
+ template_result = self.frame_insert_view.render_to_string(**params)
+ body = template_result.encode('utf-8')
+
+ return self.response_class(status_headers, [body], wbrequest=wbrequest)
#=================================================================
diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py
index 3639e8e8..b1eacfcc 100644
--- a/pywb/webapp/views.py
+++ b/pywb/webapp/views.py
@@ -88,6 +88,9 @@ class J2TemplateView(object):
def _make_loaders(self, template_dir):
loaders = []
loaders.append(FileSystemLoader(template_dir))
+ # add relative and absolute path loaders for banner support
+ loaders.append(FileSystemLoader('.'))
+ loaders.append(FileSystemLoader('/'))
loaders.append(PackageLoader(self.env_globals['package'], template_dir))
return loaders
@@ -128,28 +131,21 @@ class HeadInsertView(J2TemplateView):
def create_insert_func(self, wbrequest,
include_ts=True):
- canon_url = wbrequest.wb_prefix + wbrequest.wb_url.to_str(mod='')
+ top_url = wbrequest.wb_prefix
+ top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
+
include_wombat = not wbrequest.wb_url.is_banner_only
def make_head_insert(rule, cdx):
return (self.render_to_string(wbrequest=wbrequest,
cdx=cdx,
- canon_url=canon_url,
+ top_url=top_url,
include_ts=include_ts,
include_wombat=include_wombat,
banner_html=self.banner_html,
rule=rule))
return make_head_insert
- def _make_loaders(self, template_dir):
- loaders = []
- loaders.append(FileSystemLoader(template_dir))
- # add relative and absolute path loaders
- loaders.append(FileSystemLoader('.'))
- loaders.append(FileSystemLoader('/'))
- loaders.append(PackageLoader(self.env_globals['package'], template_dir))
- return loaders
-
@staticmethod
def init_from_config(config):
view = config.get('head_insert_view')
diff --git a/setup.py b/setup.py
index 482e5c78..5c6e5bb4 100755
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup(
name='pywb',
- version='0.6.1',
+ version='0.6.2',
url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer',
author_email='ikreymer@gmail.com',
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 500bc26f..3375329f 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -87,19 +87,19 @@ class TestWb:
assert actual_len == 3, actual_len
def test_replay_top_frame(self):
- resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
+ resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
assert '