mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
replay: support 'framed_replay' option in config for both replay and live rewrite
split replay view into BaseContentView and ReplayView refactor RewriteLiveHandler into RewriteLiveView add additional tests for framed and non-framed mode default to framed replay!
This commit is contained in:
parent
d21f8079ca
commit
80e80e97d3
@ -104,3 +104,5 @@ enable_memento: true
|
|||||||
# Use lxml parser, if available
|
# Use lxml parser, if available
|
||||||
use_lxml_parser: false
|
use_lxml_parser: false
|
||||||
|
|
||||||
|
# Replay content in an iframe
|
||||||
|
framed_replay: true
|
||||||
|
@ -17,9 +17,12 @@ This file is part of pywb.
|
|||||||
along with pywb. If not, see <http://www.gnu.org/licenses/>.
|
along with pywb. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
_wb_js = (function() {
|
||||||
|
|
||||||
function init_banner() {
|
function init_banner() {
|
||||||
var PLAIN_BANNER_ID = "_wb_plain_banner";
|
var PLAIN_BANNER_ID = "_wb_plain_banner";
|
||||||
var FRAME_BANNER_ID = "_wb_frame_top_banner";
|
var FRAME_BANNER_ID = "_wb_frame_top_banner";
|
||||||
|
var bid;
|
||||||
|
|
||||||
if (wbinfo.is_embed) {
|
if (wbinfo.is_embed) {
|
||||||
return;
|
return;
|
||||||
@ -44,7 +47,7 @@ function init_banner() {
|
|||||||
|
|
||||||
text = "This is an archived page ";
|
text = "This is an archived page ";
|
||||||
if (wbinfo && wbinfo.capture_str) {
|
if (wbinfo && wbinfo.capture_str) {
|
||||||
text += " from <b>" + wbinfo.capture_str + "</b>";
|
text += " from <b id='_wb_capture_info'>" + wbinfo.capture_str + "</b>";
|
||||||
}
|
}
|
||||||
banner.innerHTML = text;
|
banner.innerHTML = text;
|
||||||
|
|
||||||
@ -76,31 +79,51 @@ function remove_event(name, func, object) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var notified_top = false;
|
function notify_top(event) {
|
||||||
|
if (window.self == window.top) {
|
||||||
var detect_on_init = function() {
|
return;
|
||||||
if (!notified_top && window && window.top && (window.self != window.top) && window.WB_wombat_location) {
|
}
|
||||||
if (!wbinfo.is_embed) {
|
|
||||||
window.top.postMessage(window.WB_wombat_location.href, "*");
|
if (window.top.top != window.top) {
|
||||||
}
|
return;
|
||||||
notified_top = true;
|
}
|
||||||
|
|
||||||
|
if (!window.WB_wombat_location) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wbinfo.is_embed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.target != window.document) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (document.readyState === "interactive" ||
|
if (typeof(window.WB_wombat_location.href) != "string") {
|
||||||
document.readyState === "complete") {
|
return;
|
||||||
|
|
||||||
init_banner();
|
|
||||||
|
|
||||||
remove_event("readystatechange", detect_on_init, document);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (window.top.update_wb_url) {
|
||||||
|
window.top.update_wb_url(window.WB_wombat_location.href, wbinfo.timestamp, wbinfo.capture_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var detect_on_init = function(event) {
|
||||||
|
init_banner();
|
||||||
|
notify_top(event);
|
||||||
|
remove_event("readystatechange", detect_on_init, document);
|
||||||
}
|
}
|
||||||
|
|
||||||
add_event("readystatechange", detect_on_init, document);
|
add_event("readystatechange", detect_on_init, document);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (wbinfo.is_frame_mp && wbinfo.canon_url &&
|
if (wbinfo.is_frame_mp && wbinfo.canon_url &&
|
||||||
(window.self == window.top) &&
|
(window.self == window.top) && (window.self.top == window.top) &&
|
||||||
window.location.href != wbinfo.canon_url) {
|
window.location.href != wbinfo.canon_url) {
|
||||||
|
|
||||||
window.location.replace(wbinfo.canon_url);
|
window.location.replace(wbinfo.canon_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
})();
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
<!-- Start WB Insert -->
|
<!-- Start WB Insert -->
|
||||||
<script>
|
<script>
|
||||||
wbinfo = {}
|
wbinfo = {}
|
||||||
wbinfo.capture_str = "{{ timestamp | format_ts }}";
|
// wbinfo.capture_str = "{{ timestamp | format_ts }}";
|
||||||
|
wbinfo.capture_str = " ";
|
||||||
wbinfo.is_embed = false;
|
wbinfo.is_embed = false;
|
||||||
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
||||||
wbinfo.capture_url = "{{ url }}";
|
wbinfo.capture_url = "{{ url }}";
|
||||||
@ -12,27 +13,49 @@
|
|||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
|
|
||||||
window.addEventListener("message", update_url, false);
|
var update_wb_url = push_state;
|
||||||
|
|
||||||
function push_state(url) {
|
function make_outer_url(url, ts)
|
||||||
state = {}
|
{
|
||||||
state.outer_url = wbinfo.prefix + url;
|
if (ts) {
|
||||||
state.inner_url = wbinfo.prefix + "mp_/" + url;
|
return wbinfo.prefix + ts + "/" + url;
|
||||||
|
} else {
|
||||||
if (url == wbinfo.capture_url) {
|
return wbinfo.prefix + url;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function make_inner_url(url, ts)
|
||||||
|
{
|
||||||
|
if (ts) {
|
||||||
|
return wbinfo.prefix + ts + "mp_/" + url;
|
||||||
|
} else {
|
||||||
|
return wbinfo.prefix + "mp_/" + url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function push_state(url, timestamp, capture_str) {
|
||||||
|
var state = {}
|
||||||
|
state.outer_url = make_outer_url(url, timestamp);
|
||||||
|
state.inner_url = make_inner_url(url, timestamp);
|
||||||
|
state.capture_str = capture_str;
|
||||||
|
|
||||||
|
//if (url == wbinfo.capture_url) {
|
||||||
|
// return;
|
||||||
|
//}
|
||||||
|
|
||||||
window.history.replaceState(state, "", state.outer_url);
|
window.history.replaceState(state, "", state.outer_url);
|
||||||
|
update_status(state.capture_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
function pop_state(url) {
|
function pop_state(state) {
|
||||||
window.frames[0].src = url;
|
update_status(state.capture_str);
|
||||||
|
window.frames[0].src = state.outer_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
function update_url(event) {
|
function update_status(str) {
|
||||||
if (event.source == window.frames[0]) {
|
var elem = document.getElementById("_wb_capture_info");
|
||||||
push_state(event.data);
|
if (elem) {
|
||||||
|
elem.innerHTML = str;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +63,7 @@ window.onpopstate = function(event) {
|
|||||||
var curr_state = event.state;
|
var curr_state = event.state;
|
||||||
|
|
||||||
if (curr_state) {
|
if (curr_state) {
|
||||||
pop_state(curr_state.outer_url);
|
pop_state(curr_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<script>
|
<script>
|
||||||
wbinfo = {}
|
wbinfo = {}
|
||||||
|
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
||||||
wbinfo.capture_str = "{{ cdx.timestamp | format_ts }}";
|
wbinfo.capture_str = "{{ cdx.timestamp | format_ts }}";
|
||||||
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
||||||
wbinfo.is_embed = {{"true" if wbrequest.wb_url.is_embed else "false"}};
|
wbinfo.is_embed = {{"true" if wbrequest.wb_url.is_embed else "false"}};
|
||||||
|
@ -2,75 +2,24 @@ from pywb.framework.basehandlers import WbUrlHandler
|
|||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_live import LiveRewriter
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
|
||||||
|
|
||||||
from handlers import StaticHandler
|
from handlers import StaticHandler
|
||||||
|
|
||||||
from pywb.utils.canonicalize import canonicalize
|
from replay_views import RewriteLiveView
|
||||||
from pywb.utils.timeutils import datetime_to_timestamp
|
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
|
||||||
|
|
||||||
from pywb.rewrite.rewriterules import use_lxml_parser
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from views import J2TemplateView, HeadInsertView
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RewriteHandler(WbUrlHandler):
|
class RewriteHandler(WbUrlHandler):
|
||||||
def __init__(self, config={}):
|
def __init__(self, config=dict(framed_replay=True)):
|
||||||
#use_lxml_parser()
|
self.rewrite_proxy_view = RewriteLiveView(config)
|
||||||
self.rewriter = LiveRewriter(defmod='mp_')
|
|
||||||
|
|
||||||
view = config.get('head_insert_view')
|
|
||||||
if not view:
|
|
||||||
head_insert = config.get('head_insert_html',
|
|
||||||
'ui/head_insert.html')
|
|
||||||
view = HeadInsertView.create_template(head_insert, 'Head Insert')
|
|
||||||
|
|
||||||
self.head_insert_view = view
|
|
||||||
|
|
||||||
view = config.get('frame_insert_view')
|
|
||||||
if not view:
|
|
||||||
frame_insert = config.get('frame_insert_html',
|
|
||||||
'ui/frame_insert.html')
|
|
||||||
|
|
||||||
view = J2TemplateView.create_template(frame_insert, 'Frame Insert')
|
|
||||||
|
|
||||||
self.frame_insert_view = view
|
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
def __call__(self, wbrequest):
|
||||||
|
return self.rewrite_proxy_view(wbrequest)
|
||||||
url = wbrequest.wb_url.url
|
|
||||||
|
|
||||||
if not wbrequest.wb_url.mod:
|
|
||||||
embed_url = wbrequest.wb_url.to_str(mod='mp_')
|
|
||||||
timestamp = datetime_to_timestamp(datetime.datetime.utcnow())
|
|
||||||
|
|
||||||
return self.frame_insert_view.render_response(embed_url=embed_url,
|
|
||||||
wbrequest=wbrequest,
|
|
||||||
timestamp=timestamp,
|
|
||||||
url=url)
|
|
||||||
|
|
||||||
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
|
||||||
|
|
||||||
ref_wburl_str = wbrequest.extract_referrer_wburl_str()
|
|
||||||
if ref_wburl_str:
|
|
||||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
|
||||||
|
|
||||||
result = self.rewriter.fetch_request(url, wbrequest.urlrewriter,
|
|
||||||
head_insert_func=head_insert_func,
|
|
||||||
env=wbrequest.env)
|
|
||||||
|
|
||||||
status_headers, gen, is_rewritten = result
|
|
||||||
|
|
||||||
return WbResponse(status_headers, gen)
|
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
def create_live_rewriter_app():
|
def create_live_rewriter_app():
|
||||||
routes = [Route('rewrite', RewriteHandler()),
|
routes = [Route('rewrite', RewriteHandler()),
|
||||||
Route('static/default', StaticHandler('pywb/static/'))
|
Route('static/default', StaticHandler('pywb/static/'))
|
||||||
]
|
]
|
||||||
|
|
||||||
return ArchivalRouter(routes, hostpaths=['http://localhost:8080'])
|
return ArchivalRouter(routes, hostpaths=['http://localhost:8080'])
|
||||||
|
@ -9,7 +9,6 @@ from pywb.framework.basehandlers import BaseHandler
|
|||||||
from pywb.warc.recordloader import ArcWarcRecordLoader
|
from pywb.warc.recordloader import ArcWarcRecordLoader
|
||||||
from pywb.warc.resolvingloader import ResolvingLoader
|
from pywb.warc.resolvingloader import ResolvingLoader
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_content import RewriteContent
|
|
||||||
from pywb.rewrite.rewriterules import use_lxml_parser
|
from pywb.rewrite.rewriterules import use_lxml_parser
|
||||||
|
|
||||||
from views import J2TemplateView, add_env_globals
|
from views import J2TemplateView, add_env_globals
|
||||||
@ -66,8 +65,7 @@ class DictChain:
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def create_wb_handler(query_handler, config,
|
def create_wb_handler(query_handler, config):
|
||||||
ds_rules_file=DEFAULT_RULES_FILE):
|
|
||||||
|
|
||||||
cookie_maker = config.get('cookie_maker')
|
cookie_maker = config.get('cookie_maker')
|
||||||
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
||||||
@ -81,28 +79,7 @@ def create_wb_handler(query_handler, config,
|
|||||||
if template_globals:
|
if template_globals:
|
||||||
add_env_globals(template_globals)
|
add_env_globals(template_globals)
|
||||||
|
|
||||||
head_insert_view = (HeadInsertView.
|
replayer = ReplayView(resolving_loader, config)
|
||||||
create_template(config.get('head_insert_html'),
|
|
||||||
'Head Insert'))
|
|
||||||
|
|
||||||
defmod = config.get('default_mod', '')
|
|
||||||
|
|
||||||
replayer = ReplayView(
|
|
||||||
content_loader=resolving_loader,
|
|
||||||
|
|
||||||
content_rewriter=RewriteContent(ds_rules_file=ds_rules_file,
|
|
||||||
defmod=defmod),
|
|
||||||
|
|
||||||
head_insert_view=head_insert_view,
|
|
||||||
|
|
||||||
buffer_response=config.get('buffer_response', True),
|
|
||||||
|
|
||||||
redir_to_exact=config.get('redir_to_exact', True),
|
|
||||||
|
|
||||||
memento=config.get('enable_memento', False),
|
|
||||||
|
|
||||||
reporter=config.get('reporter')
|
|
||||||
)
|
|
||||||
|
|
||||||
search_view = (J2TemplateView.
|
search_view = (J2TemplateView.
|
||||||
create_template(config.get('search_html'),
|
create_template(config.get('search_html'),
|
||||||
@ -137,7 +114,7 @@ def init_collection(value, config):
|
|||||||
ds_rules_file,
|
ds_rules_file,
|
||||||
html_view)
|
html_view)
|
||||||
|
|
||||||
return route_config, query_handler, ds_rules_file
|
return route_config, query_handler
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -167,7 +144,7 @@ def create_cdx_server_app(passed_config):
|
|||||||
|
|
||||||
for name, value in collections.iteritems():
|
for name, value in collections.iteritems():
|
||||||
result = init_collection(value, config)
|
result = init_collection(value, config)
|
||||||
route_config, query_handler, ds_rules_file = result
|
route_config, query_handler = result
|
||||||
|
|
||||||
cdx_api_suffix = route_config.get('enable_cdx_api', True)
|
cdx_api_suffix = route_config.get('enable_cdx_api', True)
|
||||||
|
|
||||||
@ -210,12 +187,11 @@ def create_wb_router(passed_config={}):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
result = init_collection(value, config)
|
result = init_collection(value, config)
|
||||||
route_config, query_handler, ds_rules_file = result
|
route_config, query_handler = result
|
||||||
|
|
||||||
wb_handler = create_wb_handler(
|
wb_handler = create_wb_handler(
|
||||||
query_handler=query_handler,
|
query_handler=query_handler,
|
||||||
config=route_config,
|
config=route_config
|
||||||
ds_rules_file=ds_rules_file,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.debug('Adding Collection: ' + name)
|
logging.debug('Adding Collection: ' + name)
|
||||||
|
@ -1,15 +1,24 @@
|
|||||||
import re
|
import re
|
||||||
|
import datetime
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
from pywb.utils.wbexception import WbException, NotFoundException
|
from pywb.utils.wbexception import WbException, NotFoundException
|
||||||
from pywb.utils.loaders import LimitReader
|
from pywb.utils.loaders import LimitReader
|
||||||
|
from pywb.utils.timeutils import datetime_to_timestamp
|
||||||
|
|
||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from pywb.framework.memento import MementoResponse
|
from pywb.framework.memento import MementoResponse
|
||||||
|
|
||||||
|
from pywb.rewrite.rewrite_content import RewriteContent
|
||||||
|
from pywb.rewrite.rewrite_live import LiveRewriter
|
||||||
|
from pywb.rewrite.wburl import WbUrl
|
||||||
|
|
||||||
from pywb.warc.recordloader import ArchiveLoadFailed
|
from pywb.warc.recordloader import ArchiveLoadFailed
|
||||||
|
|
||||||
|
from views import J2TemplateView, add_env_globals
|
||||||
|
from views import J2HtmlCapturesView, HeadInsertView
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class CaptureException(WbException):
|
class CaptureException(WbException):
|
||||||
@ -23,33 +32,108 @@ class CaptureException(WbException):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReplayView(object):
|
class BaseContentView(object):
|
||||||
|
def __init__(self, config):
|
||||||
|
self.is_frame_mode = config.get('framed_replay', False)
|
||||||
|
|
||||||
|
if self.is_frame_mode:
|
||||||
|
self._mp_mod = 'mp_'
|
||||||
|
else:
|
||||||
|
self._mp_mod = ''
|
||||||
|
|
||||||
|
view = config.get('head_insert_view')
|
||||||
|
if not view:
|
||||||
|
head_insert = config.get('head_insert_html',
|
||||||
|
'ui/head_insert.html')
|
||||||
|
view = HeadInsertView.create_template(head_insert, 'Head Insert')
|
||||||
|
|
||||||
|
self.head_insert_view = view
|
||||||
|
|
||||||
|
if not self.is_frame_mode:
|
||||||
|
self.frame_insert_view = None
|
||||||
|
return
|
||||||
|
|
||||||
|
view = config.get('frame_insert_view')
|
||||||
|
if not view:
|
||||||
|
frame_insert = config.get('frame_insert_html',
|
||||||
|
'ui/frame_insert.html')
|
||||||
|
|
||||||
|
view = J2TemplateView.create_template(frame_insert, 'Frame Insert')
|
||||||
|
|
||||||
|
self.frame_insert_view = view
|
||||||
|
|
||||||
|
def __call__(self, wbrequest, *args):
|
||||||
|
# render top level frame if in frame mode
|
||||||
|
# (not supported in proxy mode)
|
||||||
|
if (self.is_frame_mode and
|
||||||
|
not wbrequest.is_proxy and
|
||||||
|
not wbrequest.wb_url.mod):
|
||||||
|
|
||||||
|
embed_url = wbrequest.wb_url.to_str(mod=self._mp_mod)
|
||||||
|
timestamp = datetime_to_timestamp(datetime.datetime.utcnow())
|
||||||
|
url = wbrequest.wb_url.url
|
||||||
|
|
||||||
|
return self.frame_insert_view.render_response(embed_url=embed_url,
|
||||||
|
wbrequest=wbrequest,
|
||||||
|
timestamp=timestamp,
|
||||||
|
url=url)
|
||||||
|
|
||||||
|
return self.render_content(wbrequest, *args)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class RewriteLiveView(BaseContentView):
|
||||||
|
def __init__(self, config):
|
||||||
|
super(RewriteLiveView, self).__init__(config)
|
||||||
|
|
||||||
|
self.rewriter = LiveRewriter(defmod=self._mp_mod)
|
||||||
|
|
||||||
|
def render_content(self, wbrequest, *args):
|
||||||
|
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
||||||
|
|
||||||
|
ref_wburl_str = wbrequest.extract_referrer_wburl_str()
|
||||||
|
if ref_wburl_str:
|
||||||
|
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||||
|
|
||||||
|
url = wbrequest.wb_url.url
|
||||||
|
result = self.rewriter.fetch_request(url, wbrequest.urlrewriter,
|
||||||
|
head_insert_func=head_insert_func,
|
||||||
|
env=wbrequest.env)
|
||||||
|
|
||||||
|
status_headers, gen, is_rewritten = result
|
||||||
|
|
||||||
|
return WbResponse(status_headers, gen)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class ReplayView(BaseContentView):
|
||||||
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
|
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
|
||||||
|
|
||||||
def __init__(self, content_loader, content_rewriter, head_insert_view=None,
|
def __init__(self, content_loader, config):
|
||||||
redir_to_exact=True, buffer_response=False, reporter=None,
|
super(ReplayView, self).__init__(config)
|
||||||
memento=False):
|
|
||||||
|
|
||||||
self.content_loader = content_loader
|
self.content_loader = content_loader
|
||||||
self.content_rewriter = content_rewriter
|
self.content_rewriter=RewriteContent(defmod=self._mp_mod)
|
||||||
|
|
||||||
self.head_insert_view = head_insert_view
|
self.buffer_response = config.get('buffer_response', True)
|
||||||
|
|
||||||
self.redir_to_exact = redir_to_exact
|
self.redir_to_exact = config.get('redir_to_exact', True)
|
||||||
# buffer or stream rewritten response
|
|
||||||
self.buffer_response = buffer_response
|
|
||||||
|
|
||||||
self._reporter = reporter
|
|
||||||
|
|
||||||
|
memento = config.get('enable_memento', False)
|
||||||
if memento:
|
if memento:
|
||||||
self.response_class = MementoResponse
|
self.response_class = MementoResponse
|
||||||
else:
|
else:
|
||||||
self.response_class = WbResponse
|
self.response_class = WbResponse
|
||||||
|
|
||||||
def __call__(self, wbrequest, cdx_lines, cdx_loader):
|
self._reporter = config.get('reporter')
|
||||||
|
|
||||||
|
def render_content(self, wbrequest, *args):
|
||||||
last_e = None
|
last_e = None
|
||||||
first = True
|
first = True
|
||||||
|
|
||||||
|
cdx_lines = args[0]
|
||||||
|
cdx_loader = args[1]
|
||||||
|
|
||||||
# List of already failed w/arcs
|
# List of already failed w/arcs
|
||||||
failed_files = []
|
failed_files = []
|
||||||
|
|
||||||
|
@ -13,10 +13,18 @@ collections:
|
|||||||
pywb: ./sample_archive/cdx/
|
pywb: ./sample_archive/cdx/
|
||||||
|
|
||||||
# ex with filtering: filter CDX lines by filename starting with 'dupe'
|
# ex with filtering: filter CDX lines by filename starting with 'dupe'
|
||||||
pywb-filt: {'index_paths': './sample_archive/cdx/', 'filters': ['filename:dupe*']}
|
pywb-filt:
|
||||||
|
index_paths: './sample_archive/cdx/'
|
||||||
|
filters: ['filename:dupe*']
|
||||||
|
|
||||||
|
pywb-nonframe:
|
||||||
|
index_paths: './sample_archive/cdx/'
|
||||||
|
framed_replay: false
|
||||||
|
|
||||||
# collection of non-surt CDX
|
# collection of non-surt CDX
|
||||||
pywb-nosurt: {'index_paths': './sample_archive/non-surt-cdx/', 'surt_ordered': False}
|
pywb-nosurt:
|
||||||
|
index_paths: './sample_archive/non-surt-cdx/'
|
||||||
|
surt_ordered: false
|
||||||
|
|
||||||
|
|
||||||
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
||||||
@ -101,6 +109,12 @@ reporter: !!python/object/new:tests.fixture.PrintReporter []
|
|||||||
# custom rules for domain specific matching
|
# custom rules for domain specific matching
|
||||||
#domain_specific_rules: rules.yaml
|
#domain_specific_rules: rules.yaml
|
||||||
|
|
||||||
|
# Use lxml parser, if available
|
||||||
|
use_lxml_parser: false
|
||||||
|
|
||||||
|
# Replay content in an iframe
|
||||||
|
framed_replay: true
|
||||||
|
|
||||||
# ==== New / Experimental Settings ====
|
# ==== New / Experimental Settings ====
|
||||||
# Not yet production ready -- used primarily for testing
|
# Not yet production ready -- used primarily for testing
|
||||||
|
|
||||||
|
@ -85,30 +85,43 @@ class TestWb:
|
|||||||
actual_len = len(str(resp.body).rstrip().split('\n'))
|
actual_len = len(str(resp.body).rstrip().split('\n'))
|
||||||
assert actual_len == 3, actual_len
|
assert actual_len == 3, actual_len
|
||||||
|
|
||||||
|
def test_replay_top_frame(self):
|
||||||
def test_replay_1(self):
|
|
||||||
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
||||||
|
|
||||||
|
assert '<iframe ' in resp.body
|
||||||
|
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.body
|
||||||
|
|
||||||
|
def test_replay_content(self):
|
||||||
|
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert 'Mon, Jan 27 2014 17:12:38' in resp.body
|
assert 'Mon, Jan 27 2014 17:12:38' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.body
|
||||||
|
|
||||||
|
def test_replay_non_frame_content(self):
|
||||||
|
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
|
||||||
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
|
assert 'Mon, Jan 27 2014 17:12:38' in resp.body
|
||||||
|
assert 'wb.js' in resp.body
|
||||||
|
assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.body
|
||||||
|
|
||||||
def test_replay_non_surt(self):
|
def test_replay_non_surt(self):
|
||||||
resp = self.testapp.get('/pywb-nosurt/20140103030321/http://example.com?example=1')
|
resp = self.testapp.get('/pywb-nosurt/20140103030321mp_/http://example.com?example=1')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert 'Fri, Jan 03 2014 03:03:21' in resp.body
|
assert 'Fri, Jan 03 2014 03:03:21' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb-nosurt/20140103030321mp_/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_replay_url_agnostic_revisit(self):
|
def test_replay_url_agnostic_revisit(self):
|
||||||
resp = self.testapp.get('/pywb/20130729195151/http://www.example.com/')
|
resp = self.testapp.get('/pywb/20130729195151mp_/http://www.example.com/')
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
|
|
||||||
assert 'Mon, Jul 29 2013 19:51:51' in resp.body
|
assert 'Mon, Jul 29 2013 19:51:51' in resp.body
|
||||||
assert 'wb.js' in resp.body
|
assert 'wb.js' in resp.body
|
||||||
assert '/pywb/20130729195151/http://www.iana.org/domains/example"' in resp.body
|
assert '/pywb/20130729195151mp_/http://www.iana.org/domains/example"' in resp.body
|
||||||
|
|
||||||
def test_replay_cdx_mod(self):
|
def test_replay_cdx_mod(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
|
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
|
||||||
@ -164,42 +177,42 @@ class TestWb:
|
|||||||
assert resp.content_type == 'application/x-javascript'
|
assert resp.content_type == 'application/x-javascript'
|
||||||
|
|
||||||
def test_redirect_1(self):
|
def test_redirect_1(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171237mp_/http://www.iana.org/')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
|
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://iana.org')
|
||||||
|
|
||||||
|
|
||||||
def test_redirect_replay_2(self):
|
def test_redirect_replay_2(self):
|
||||||
resp = self.testapp.get('/pywb/http://example.com/')
|
resp = self.testapp.get('/pywb/mp_/http://example.com/')
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
|
|
||||||
assert resp.headers['Location'].endswith('/20140127171251/http://example.com')
|
assert resp.headers['Location'].endswith('/20140127171251mp_/http://example.com')
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
|
|
||||||
#check resp
|
#check resp
|
||||||
self._assert_basic_html(resp)
|
self._assert_basic_html(resp)
|
||||||
assert 'Mon, Jan 27 2014 17:12:51' in resp.body
|
assert 'Mon, Jan 27 2014 17:12:51' in resp.body
|
||||||
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
|
assert '/pywb/20140127171251mp_/http://www.iana.org/domains/example' in resp.body
|
||||||
|
|
||||||
def test_redirect_relative_3(self):
|
def test_redirect_relative_3(self):
|
||||||
# first two requests should result in same redirect
|
# first two requests should result in same redirect
|
||||||
target = 'http://localhost:8080/pywb/2014/http://iana.org/_css/2013.1/screen.css'
|
target = 'http://localhost:8080/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
||||||
|
|
||||||
# without timestamp
|
# without timestamp
|
||||||
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014/http://iana.org/')])
|
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
# with timestamp
|
# with timestamp
|
||||||
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014/http://iana.org/')])
|
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014mp_/http://iana.org/')])
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'] == target, resp.headers['Location']
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
||||||
|
|
||||||
|
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
assert resp.status_int == 302
|
assert resp.status_int == 302
|
||||||
assert resp.headers['Location'].endswith('/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css')
|
assert resp.headers['Location'].endswith('/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css')
|
||||||
|
|
||||||
resp = resp.follow()
|
resp = resp.follow()
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
@ -207,7 +220,7 @@ class TestWb:
|
|||||||
|
|
||||||
|
|
||||||
def test_referrer_self_redirect(self):
|
def test_referrer_self_redirect(self):
|
||||||
uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
|
uri = '/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css'
|
||||||
host = 'somehost:8082'
|
host = 'somehost:8082'
|
||||||
referrer = 'http://' + host + uri
|
referrer = 'http://' + host + uri
|
||||||
|
|
||||||
@ -221,7 +234,7 @@ class TestWb:
|
|||||||
|
|
||||||
|
|
||||||
def test_post_1(self):
|
def test_post_1(self):
|
||||||
resp = self.testapp.post('/pywb/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
||||||
|
|
||||||
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
|
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
|
||||||
#assert resp.status_int == 307
|
#assert resp.status_int == 307
|
||||||
@ -236,13 +249,13 @@ class TestWb:
|
|||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
|
||||||
def test_post_2(self):
|
def test_post_2(self):
|
||||||
resp = self.testapp.post('/pywb/20140610001255/http://httpbin.org/post?foo=bar', {'data': '^'})
|
resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'})
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"data": "^"' in resp.body
|
assert '"data": "^"' in resp.body
|
||||||
|
|
||||||
def test_post_redirect(self):
|
def test_post_redirect(self):
|
||||||
# post handled without redirect (since 307 not allowed)
|
# post handled without redirect (since 307 not allowed)
|
||||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014/http://httpbin.org/post')])
|
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')])
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"foo": "bar"' in resp.body
|
assert '"foo": "bar"' in resp.body
|
||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
Loading…
x
Reference in New Issue
Block a user