mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
This commit is contained in:
parent
d8b6ad3a31
commit
a6ab167dd3
@ -3,6 +3,7 @@
|
||||
#
|
||||
# Settings for each collection
|
||||
|
||||
|
||||
collections:
|
||||
# <name>: <cdx_path>
|
||||
# collection will be accessed via /<name>
|
||||
|
@ -57,7 +57,9 @@ class BaseCli(object):
|
||||
|
||||
def load(self):
|
||||
if self.r.live:
|
||||
self.extra_config = {'collections': {'live': '$live'}}
|
||||
self.extra_config = {'collections':
|
||||
{'live': {'index': '$live',
|
||||
'use_js_obj_proxy': True}}}
|
||||
|
||||
def run(self):
|
||||
self.run_gevent()
|
||||
@ -80,6 +82,7 @@ class ReplayCli(BaseCli):
|
||||
|
||||
def load(self):
|
||||
super(ReplayCli, self).load()
|
||||
import os
|
||||
if self.r.directory: #pragma: no cover
|
||||
os.chdir(self.r.directory)
|
||||
|
||||
|
@ -88,13 +88,25 @@ class FrontEndApp(object):
|
||||
except:
|
||||
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
|
||||
|
||||
def get_metadata(self, coll):
|
||||
metadata = {'coll': coll}
|
||||
|
||||
if coll in self.warcserver.list_fixed_routes():
|
||||
metadata.update(self.warcserver.get_coll_config(coll))
|
||||
metadata['type'] = 'replay-fixed'
|
||||
else:
|
||||
metadata.update(self.metadata_cache.load(coll))
|
||||
metadata['type'] = 'replay-dyn'
|
||||
|
||||
return metadata
|
||||
|
||||
def serve_coll_page(self, environ, coll):
|
||||
if not self.is_valid_coll(coll):
|
||||
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
||||
|
||||
self.setup_paths(environ, coll)
|
||||
|
||||
metadata = self.metadata_cache.load(coll)
|
||||
metadata = self.get_metadata(coll)
|
||||
|
||||
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
||||
|
||||
@ -115,15 +127,10 @@ class FrontEndApp(object):
|
||||
if environ.get('QUERY_STRING'):
|
||||
wb_url_str += '?' + environ.get('QUERY_STRING')
|
||||
|
||||
kwargs = {'coll': coll}
|
||||
|
||||
if coll in self.warcserver.list_fixed_routes():
|
||||
kwargs['type'] = 'replay-fixed'
|
||||
else:
|
||||
kwargs['type'] = 'replay-dyn'
|
||||
metadata = self.get_metadata(coll)
|
||||
|
||||
try:
|
||||
response = self.rewriterapp.render_content(wb_url_str, kwargs, environ)
|
||||
response = self.rewriterapp.render_content(wb_url_str, metadata, environ)
|
||||
except UpstreamException as ue:
|
||||
response = self.rewriterapp.handle_error(environ, ue)
|
||||
raise HTTPException(response=response)
|
||||
|
@ -3,10 +3,7 @@ import requests
|
||||
from werkzeug.http import HTTP_STATUS_CODES
|
||||
from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
|
||||
|
||||
#from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
||||
#from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
||||
#from pywb.rewrite.rewrite_content import RewriteContent
|
||||
from pywb.rewrite.default_rewriter import DefaultRewriter
|
||||
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
||||
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
||||
@ -66,10 +63,8 @@ class RewriterApp(object):
|
||||
self.frame_mod = None
|
||||
self.replay_mod = ''
|
||||
|
||||
#frame_type = 'inverse' if framed_replay else False
|
||||
|
||||
#self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
||||
self.content_rw = DefaultRewriter(replay_mod=self.replay_mod)
|
||||
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod)
|
||||
self.js_proxy_rw = RewriterWithJSProxy(replay_mod=self.replay_mod)
|
||||
|
||||
if not jinja_env:
|
||||
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
||||
@ -148,8 +143,12 @@ class RewriterApp(object):
|
||||
|
||||
urlkey = canonicalize(wb_url.url)
|
||||
|
||||
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url,
|
||||
self.content_rw)
|
||||
if kwargs.get('use_js_obj_proxy'):
|
||||
content_rw = self.js_proxy_rw
|
||||
else:
|
||||
content_rw = self.default_rw
|
||||
|
||||
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url, content_rw)
|
||||
|
||||
inputreq.include_post_query(wb_url.url)
|
||||
|
||||
@ -267,15 +266,8 @@ class RewriterApp(object):
|
||||
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
||||
cookie_key)
|
||||
|
||||
#result = self.content_rewriter.rewrite_content(urlrewriter,
|
||||
# record.http_headers,
|
||||
# record.raw_stream,
|
||||
# head_insert_func,
|
||||
# urlkey,
|
||||
# cdx,
|
||||
# cookie_rewriter,
|
||||
# environ)
|
||||
result = self.content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
||||
urlrewriter.rewrite_opts['ua_string'] = environ.get('HTTP_USER_AGENT')
|
||||
result = content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
||||
|
||||
status_headers, gen, is_rw = result
|
||||
|
||||
|
@ -4,6 +4,7 @@ import os
|
||||
from pywb.utils.loaders import LocalFileLoader
|
||||
|
||||
from pywb.apps.wbrequestresponse import WbResponse
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -37,7 +37,10 @@ directory structure expected by pywb
|
||||
|
||||
COLL_RX = re.compile('^[\w][-\w]*$')
|
||||
|
||||
def __init__(self, coll_name, colls_dir='collections', must_exist=True):
|
||||
COLLS_DIR = 'collections'
|
||||
|
||||
def __init__(self, coll_name, colls_dir=None, must_exist=True):
|
||||
colls_dir = colls_dir or self.COLLS_DIR
|
||||
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
||||
|
||||
if coll_name and not self.COLL_RX.match(coll_name):
|
||||
|
@ -20,14 +20,16 @@ class BaseContentRewriter(object):
|
||||
|
||||
def __init__(self, rules_file, replay_mod=''):
|
||||
self.rules = []
|
||||
self.all_rewriters = []
|
||||
self.load_rules(rules_file)
|
||||
self.replay_mod = replay_mod
|
||||
#for rw in self.known_rewriters:
|
||||
# self.all_rewriters[rw.name] = rw
|
||||
|
||||
def add_rewriter(self, rw):
|
||||
self.all_rewriters[rw.name] = rw
|
||||
|
||||
def get_rewriter(self, rw_type, rwinfo=None):
|
||||
return self.all_rewriters.get(rw_type)
|
||||
|
||||
def load_rules(self, filename):
|
||||
config = load_yaml_config(filename)
|
||||
for rule in config.get('rules'):
|
||||
@ -68,7 +70,7 @@ class BaseContentRewriter(object):
|
||||
text_type = 'js-proxy'
|
||||
|
||||
rw_type = rule.get(text_type, text_type)
|
||||
rw_class = self.all_rewriters.get(rw_type)
|
||||
rw_class = self.get_rewriter(rw_type, rwinfo)
|
||||
|
||||
return rw_type, rw_class
|
||||
|
||||
@ -98,7 +100,7 @@ class BaseContentRewriter(object):
|
||||
|
||||
# if no js rewriter, then do banner insert only
|
||||
if not js_rewriter:
|
||||
rw_class = self.all_rewriters.get('html-banner-only')
|
||||
rw_class = self.get_rewriter('html-banner-only', rwinfo)
|
||||
|
||||
rw = rw_class(rwinfo.url_rewriter,
|
||||
js_rewriter=js_rewriter,
|
||||
@ -146,7 +148,7 @@ class BaseContentRewriter(object):
|
||||
return charset
|
||||
|
||||
def rewrite_headers(self, rwinfo):
|
||||
header_rw_class = self.all_rewriters.get('header')
|
||||
header_rw_class = self.get_rewriter('header', rwinfo)
|
||||
return header_rw_class(rwinfo)()
|
||||
|
||||
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||
@ -268,7 +270,7 @@ class RewriteInfo(object):
|
||||
self.url_rewriter = url_rewriter
|
||||
|
||||
if not cookie_rewriter:
|
||||
cookie_rw_class = content_rewriter.all_rewriters.get('cookie')
|
||||
cookie_rw_class = content_rewriter.get_rewriter('cookie', self)
|
||||
if cookie_rw_class:
|
||||
cookie_rewriter = cookie_rw_class(url_rewriter)
|
||||
|
||||
@ -328,20 +330,20 @@ class RewriteInfo(object):
|
||||
return buff
|
||||
|
||||
def should_rw_content(self):
|
||||
if not self.text_type:
|
||||
return False
|
||||
|
||||
if self.url_rewriter.wburl.mod == 'id_':
|
||||
return False
|
||||
|
||||
if self.text_type == 'html':
|
||||
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||
if self.text_type in ('html', 'js'):
|
||||
return False
|
||||
|
||||
elif self.text_type == 'plain':
|
||||
if self.text_type == 'plain':
|
||||
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
||||
return False
|
||||
|
||||
elif not self.text_type:
|
||||
return False
|
||||
|
||||
elif self.text_type == 'css' or self.text_type == 'xml':
|
||||
if self.url_rewriter.wburl.mod == 'bn_':
|
||||
return False
|
||||
|
@ -4,8 +4,7 @@ from pywb.rewrite.html_rewriter import HTMLRewriter
|
||||
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
||||
|
||||
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
||||
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter, JSWombatProxyRewriter
|
||||
|
||||
from pywb.rewrite.header_rewriter import PrefixHeaderRewriter
|
||||
from pywb.rewrite.cookie_rewriter import HostScopeCookieRewriter
|
||||
@ -16,10 +15,13 @@ from pywb.rewrite.rewrite_dash import RewriteDASH
|
||||
from pywb.rewrite.rewrite_hls import RewriteHLS
|
||||
from pywb.rewrite.rewrite_amf import RewriteAMF
|
||||
|
||||
import copy
|
||||
from werkzeug.useragents import UserAgent
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class DefaultRewriter(BaseContentRewriter):
|
||||
all_rewriters = {
|
||||
DEFAULT_REWRITERS = {
|
||||
'header': PrefixHeaderRewriter,
|
||||
'cookie': HostScopeCookieRewriter,
|
||||
|
||||
@ -68,10 +70,10 @@ class DefaultRewriter(BaseContentRewriter):
|
||||
# AMF
|
||||
'application/x-amf': 'amf',
|
||||
|
||||
# XML
|
||||
'text/xml': 'xml',
|
||||
'application/xml': 'xml',
|
||||
'application/rss+xml': 'xml',
|
||||
# XML -- don't rewrite xml
|
||||
#'text/xml': 'xml',
|
||||
#'application/xml': 'xml',
|
||||
#'application/rss+xml': 'xml',
|
||||
|
||||
# PLAIN
|
||||
'text/plain': 'plain',
|
||||
@ -80,9 +82,48 @@ class DefaultRewriter(BaseContentRewriter):
|
||||
def __init__(self, rules_file=None, replay_mod=''):
|
||||
rules_file = rules_file or 'pkg://pywb/rules.yaml'
|
||||
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
||||
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
|
||||
|
||||
def init_js_regex(self, regexs):
|
||||
return RegexRewriter.parse_rules_from_config(regexs)
|
||||
|
||||
def get_rewrite_types(self):
|
||||
return self.rewrite_types
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class RewriterWithJSProxy(DefaultRewriter):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(RewriterWithJSProxy, self).__init__(*args, **kwargs)
|
||||
|
||||
def get_rewriter(self, rw_type, rwinfo=None):
|
||||
if rw_type == 'js' and rwinfo:
|
||||
# check if UA allows this
|
||||
if self.ua_allows_obj_proxy(rwinfo.url_rewriter.rewrite_opts):
|
||||
return JSWombatProxyRewriter
|
||||
|
||||
# otherwise, return default rewriter
|
||||
return super(RewriterWithJSProxy, self).get_rewriter(rw_type, rwinfo)
|
||||
|
||||
def ua_allows_obj_proxy(self, opts):
|
||||
ua = opts.get('ua')
|
||||
if not ua:
|
||||
ua_string = opts.get('ua_string')
|
||||
if ua_string:
|
||||
ua = UserAgent(ua_string)
|
||||
|
||||
if ua is None:
|
||||
return True
|
||||
|
||||
supported = {
|
||||
'chrome': '49.0',
|
||||
'firefox': '44.0',
|
||||
'safari': '10.0',
|
||||
'opera': '36.0',
|
||||
'edge': '12.0',
|
||||
'msie': None,
|
||||
}
|
||||
|
||||
min_vers = supported.get(ua.browser)
|
||||
|
||||
return (min_vers and ua.version >= min_vers)
|
||||
|
@ -1,5 +1,4 @@
|
||||
import re
|
||||
|
||||
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||
|
||||
|
||||
@ -109,6 +108,7 @@ class RegexRewriter(StreamingRewriter):
|
||||
return result
|
||||
|
||||
return list(map(parse_rule, config))
|
||||
|
||||
return run_parse_rules
|
||||
|
||||
|
||||
@ -151,6 +151,51 @@ class JSLocationRewriterMixin(object):
|
||||
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
||||
|
||||
|
||||
# =================================================================
|
||||
class JSWombatProxyRewriterMixin(object):
|
||||
"""
|
||||
JS Rewriter mixin which wraps the contents of the
|
||||
script in an anonymous block scope and inserts
|
||||
Wombat js-proxy setup
|
||||
"""
|
||||
|
||||
local_init_func = '\nvar {0} = function(name) {{\
|
||||
return (self._wb_wombat && self._wb_wombat.local_init &&\
|
||||
self._wb_wombat.local_init(name)) || self[name]; }}\n\
|
||||
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
{{\n'
|
||||
|
||||
local_init_func_name = '_____WB$wombat$assign$function_____'
|
||||
|
||||
local_var_line = 'let {0} = {1}("{0}");'
|
||||
|
||||
local_objs = ['window',
|
||||
'self',
|
||||
'document',
|
||||
'location',
|
||||
'top',
|
||||
'parent',
|
||||
'frames',
|
||||
'opener']
|
||||
|
||||
def __init__(self, rewriter, rules=[]):
|
||||
rules = rules + [
|
||||
(r'Function\(["\']return this["\']\)', RegexRewriter.format('Function("return this._WB_wombat_obj_proxy || this")'), 0),
|
||||
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self).'), 0),
|
||||
]
|
||||
|
||||
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
|
||||
|
||||
local_declares = '\n'.join([self.local_var_line.format(obj, self.local_init_func_name) for obj in self.local_objs])
|
||||
|
||||
self.first_buff = self.local_init_func.format(self.local_init_func_name) + local_declares
|
||||
|
||||
self.close_string = '\n\n}'
|
||||
|
||||
def final_read(self):
|
||||
return self.close_string
|
||||
|
||||
|
||||
# =================================================================
|
||||
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||
pass
|
||||
@ -174,6 +219,11 @@ class JSNoneRewriter(RegexRewriter):
|
||||
super(JSNoneRewriter, self).__init__(rewriter, rules)
|
||||
|
||||
|
||||
# =================================================================
|
||||
class JSWombatProxyRewriter(JSWombatProxyRewriterMixin, RegexRewriter):
|
||||
pass
|
||||
|
||||
|
||||
# =================================================================
|
||||
# Set 'default' JSRewriter
|
||||
JSRewriter = JSLinkAndLocationRewriter
|
||||
@ -204,7 +254,6 @@ class XMLRewriter(RegexRewriter):
|
||||
|
||||
# =================================================================
|
||||
class CSSRewriter(RegexRewriter):
|
||||
|
||||
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
|
||||
|
||||
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
|
||||
|
@ -18,7 +18,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
||||
*/
|
||||
|
||||
//============================================
|
||||
// Wombat JS-Rewriting Library v2.31
|
||||
// Wombat JS-Rewriting Library v2.40
|
||||
//============================================
|
||||
|
||||
|
||||
@ -891,6 +891,22 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function override_prop_to_proxy(proto, prop) {
|
||||
var orig_getter = get_orig_getter(proto, prop);
|
||||
|
||||
if (orig_getter) {
|
||||
var new_getter = function() {
|
||||
var res = orig_getter.call(this);
|
||||
res = (res && res._WB_wombat_obj_proxy) || res;
|
||||
return res;
|
||||
}
|
||||
|
||||
def_prop(proto, prop, undefined, new_getter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function override_attr_props() {
|
||||
function is_rw_attr(attr) {
|
||||
@ -922,9 +938,13 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
$wbwindow.Element.prototype._orig_setAttribute = orig_setAttribute;
|
||||
|
||||
$wbwindow.Element.prototype.setAttribute = function(name, value) {
|
||||
if (name) {
|
||||
if (name && typeof(value) === "string") {
|
||||
var lowername = name.toLowerCase();
|
||||
if (typeof(value) == "string" && should_rewrite_attr(this.tagName, lowername)) {
|
||||
|
||||
if (this.tagName == "LINK" && lowername == "href" && value.indexOf("data:text/css") == 0) {
|
||||
value = rewrite_inline_style(value);
|
||||
|
||||
} else if (should_rewrite_attr(this.tagName, lowername)) {
|
||||
if (!this._no_rewrite) {
|
||||
var old_value = value;
|
||||
|
||||
@ -934,7 +954,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
value = rewrite_url(value, false, mod);
|
||||
}
|
||||
} else if (lowername == "style" && typeof(value) == "string") {
|
||||
} else if (lowername == "style") {
|
||||
value = rewrite_style(value);
|
||||
} else if (lowername == "srcset") {
|
||||
value = rewrite_srcset(value);
|
||||
@ -1156,96 +1176,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
/* function init_mutation_obs($wbwindow) {
|
||||
if (!$wbwindow.MutationObserver) {
|
||||
return;
|
||||
}
|
||||
|
||||
var m = new MutationObserver(function(records, observer)
|
||||
{
|
||||
for (var i = 0; i < records.length; i++) {
|
||||
var r = records[i];
|
||||
if (r.type == "attributes" && r.attributeName == "style") {
|
||||
var style = r.target.style.cssText;
|
||||
if (style.indexOf("url(") > 0) {
|
||||
var new_style = rewrite_style(style);
|
||||
if (new_style != style) {
|
||||
r.target.style.cssText = new_style;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
m.observe($wbwindow.document.documentElement, {
|
||||
childList: false,
|
||||
attributes: true,
|
||||
subtree: true,
|
||||
//attributeOldValue: true,
|
||||
attributeFilter: ["style"]});
|
||||
}
|
||||
*/
|
||||
//============================================
|
||||
/* function init_href_src_obs($wbwindow)
|
||||
{
|
||||
if (!$wbwindow.MutationObserver) {
|
||||
return;
|
||||
}
|
||||
|
||||
var m = new MutationObserver(function(records, observer)
|
||||
{
|
||||
for (var i = 0; i < records.length; i++) {
|
||||
var r = records[i];
|
||||
if (r.type == "attributes") {
|
||||
//var curr = wb_getAttribute(r.target, r.attributeName);
|
||||
var curr = r.target.getAttribute(r.attributeName);
|
||||
var new_url = rewrite_url(curr);
|
||||
if (curr != new_url) {
|
||||
wb_setAttribute.call(r.target, r.attributeName, new_url);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
m.observe($wbwindow.document.documentElement, {
|
||||
childList: false,
|
||||
attributes: true,
|
||||
subtree: true,
|
||||
//attributeOldValue: true,
|
||||
attributeFilter: ["src", "href"]});
|
||||
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
function init_iframe_insert_obs(root)
|
||||
{
|
||||
if (!$wbwindow.MutationObserver) {
|
||||
return;
|
||||
}
|
||||
|
||||
var m = new MutationObserver(function(records, observer)
|
||||
{
|
||||
for (var i = 0; i < records.length; i++) {
|
||||
var r = records[i];
|
||||
if (r.type == "childList") {
|
||||
for (var j = 0; j < r.addedNodes.length; j++) {
|
||||
if (r.addedNodes[j].tagName == "IFRAME") {
|
||||
init_iframe_wombat(r.addedNodes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
m.observe(root, {
|
||||
childList: true,
|
||||
subtree: true,
|
||||
});
|
||||
}
|
||||
*/
|
||||
//============================================
|
||||
function rewrite_attr(elem, name, abs_url_only) {
|
||||
if (!elem || !elem.getAttribute) {
|
||||
@ -1516,14 +1446,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function override_attr(obj, attr, mod, default_to_setget) {
|
||||
var orig_getter = get_orig_getter(obj, attr);
|
||||
var orig_setter = get_orig_setter(obj, attr);
|
||||
|
||||
var setter = function(orig) {
|
||||
var val;
|
||||
|
||||
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
||||
function rewrite_inline_style(orig) {
|
||||
var decoded;
|
||||
|
||||
try {
|
||||
@ -1539,6 +1462,20 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
} else {
|
||||
val = rewrite_style(orig);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function override_attr(obj, attr, mod, default_to_setget) {
|
||||
var orig_getter = get_orig_getter(obj, attr);
|
||||
var orig_setter = get_orig_setter(obj, attr);
|
||||
|
||||
var setter = function(orig) {
|
||||
var val;
|
||||
|
||||
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
||||
val = rewrite_inline_style(orig);
|
||||
} else {
|
||||
val = rewrite_url(orig, false, mod);
|
||||
}
|
||||
@ -1598,7 +1535,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
|
||||
//============================================
|
||||
function init_attr_overrides($wbwindow) {
|
||||
function init_attr_overrides() {
|
||||
override_attr($wbwindow.HTMLLinkElement.prototype, "href", "cs_");
|
||||
override_attr($wbwindow.CSSStyleSheet.prototype, "href", "cs_");
|
||||
override_attr($wbwindow.HTMLImageElement.prototype, "src", "im_");
|
||||
@ -1631,6 +1568,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
override_style_attr(style_proto, "background", "background");
|
||||
override_style_attr(style_proto, "backgroundImage", "background-image");
|
||||
|
||||
override_style_attr(style_proto, "cursor", "cursor");
|
||||
|
||||
override_style_attr(style_proto, "listStyle", "list-style");
|
||||
override_style_attr(style_proto, "listStyleImage", "list-style-image");
|
||||
|
||||
@ -1730,7 +1669,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
var getter = function() {
|
||||
init_iframe_wombat(this);
|
||||
return orig_getter.call(this);
|
||||
var res = orig_getter.call(this);
|
||||
res = (res && res._WB_wombat_obj_proxy) || res;
|
||||
return res;
|
||||
};
|
||||
|
||||
def_prop(obj, prop, orig_setter, getter);
|
||||
@ -1882,6 +1823,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
replace_dom_func("appendChild");
|
||||
replace_dom_func("insertBefore");
|
||||
replace_dom_func("replaceChild");
|
||||
|
||||
override_prop_to_proxy($wbwindow.Node.prototype, "ownerDocument");
|
||||
override_prop_to_proxy($wbwindow.HTMLHtmlElement.prototype, "parentNode");
|
||||
}
|
||||
|
||||
|
||||
@ -1911,7 +1855,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
function receive_hash_change(event)
|
||||
{
|
||||
if (!event.data || event.source != $wbwindow.__WB_top_frame) {
|
||||
var source = event.source.__WBProxyRealObj__ || event.source;
|
||||
|
||||
if (!event.data || source != $wbwindow.__WB_top_frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2026,6 +1972,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
source = win.__WB_win_id[event.data.src_id];
|
||||
}
|
||||
|
||||
source = source.__WBProxyRealObj__ || source;
|
||||
|
||||
ne = new MessageEvent("message",
|
||||
{"bubbles": event.bubbles,
|
||||
"cancelable": event.cancelable,
|
||||
@ -2123,9 +2071,23 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
addMEOverride("eventPhase");
|
||||
addMEOverride("path");
|
||||
|
||||
override_prop_to_proxy($wbwindow.MessageEvent.prototype, "source");
|
||||
|
||||
$wbwindow.MessageEvent.prototype.__extended = true;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_mo_from_proxy() {
|
||||
var orig_observe = $wbwindow.MutationObserver.prototype.observe;
|
||||
|
||||
function observe_deproxy(target, options) {
|
||||
target = target && target.__WBProxyRealObj__ || target;
|
||||
return orig_observe.call(this, target, options);
|
||||
}
|
||||
|
||||
$wbwindow.MutationObserver.prototype.observe = observe_deproxy;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_open_override()
|
||||
{
|
||||
@ -2140,7 +2102,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
var res = orig.call(this, strUrl, strWindowName, strWindowFeatures);
|
||||
init_new_window_wombat(res, strUrl);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
$wbwindow.open = open_rewritten;
|
||||
|
||||
@ -2158,7 +2120,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_cookies_override($wbwindow)
|
||||
function init_cookies_override()
|
||||
{
|
||||
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
|
||||
var cookie_domain_regex = /\bDomain=([^;'"\s]+)/i;
|
||||
@ -2342,6 +2304,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
init_new_window_wombat(win, src);
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_new_window_wombat(win, src) {
|
||||
if (!win || win._wb_wombat) {
|
||||
return;
|
||||
@ -2366,76 +2329,46 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
|
||||
//============================================
|
||||
function init_doc_overrides($wbwindow) {
|
||||
function init_doc_overrides($document) {
|
||||
if (!Object.defineProperty) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($wbwindow.document._wb_override) {
|
||||
return;
|
||||
}
|
||||
// referrer
|
||||
override_prop_extract($document, "referrer");
|
||||
|
||||
var orig_referrer = extract_orig($wbwindow.document.referrer);
|
||||
|
||||
var domain_info;
|
||||
|
||||
if ($wbwindow.wbinfo) {
|
||||
domain_info = $wbwindow.wbinfo;
|
||||
} else {
|
||||
domain_info = wbinfo;
|
||||
}
|
||||
|
||||
domain_info.domain = domain_info.wombat_host;
|
||||
// origin
|
||||
def_prop($document, "origin", undefined, function() { return this._WB_wombat_location.origin; });
|
||||
|
||||
// domain
|
||||
var domain_setter = function(val) {
|
||||
if (ends_with(domain_info.wombat_host, val)) {
|
||||
domain_info.domain = val;
|
||||
if (ends_with(this._WB_wombat_location.hostname, val)) {
|
||||
this.__wb_domain = val;
|
||||
}
|
||||
}
|
||||
|
||||
var domain_getter = function() {
|
||||
return domain_info.domain;
|
||||
return this.__wb_domain || this._WB_wombat_location.hostname;
|
||||
}
|
||||
|
||||
// changing domain disallowed, but set as no-op to avoid errors
|
||||
def_prop($wbwindow.document, "domain", domain_setter, domain_getter);
|
||||
def_prop($document, "domain", domain_setter, domain_getter);
|
||||
|
||||
def_prop($wbwindow.document, "referrer", undefined, function() { return orig_referrer; });
|
||||
|
||||
|
||||
// Cookies
|
||||
init_cookies_override($wbwindow);
|
||||
|
||||
// Init mutation observer (for style only)
|
||||
//init_mutation_obs($wbwindow);
|
||||
|
||||
// override href and src attrs
|
||||
init_attr_overrides($wbwindow);
|
||||
|
||||
|
||||
init_form_overrides($wbwindow);
|
||||
|
||||
|
||||
// Attr observers
|
||||
//if (!wb_opts.skip_attr_observers) {
|
||||
// init_href_src_obs($wbwindow);
|
||||
//}
|
||||
|
||||
$wbwindow.document._wb_override = true;
|
||||
// override form action
|
||||
init_form_overrides($document);
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
// Necessary since HTMLFormElement.prototype.action is not consistently
|
||||
// overridable
|
||||
function init_form_overrides($wbwindow) {
|
||||
function init_form_overrides($document) {
|
||||
var do_init_forms = function() {
|
||||
for (var i = 0; i < $wbwindow.document.forms.length; i++) {
|
||||
var new_action = rewrite_url($wbwindow.document.forms[i].action);
|
||||
if (new_action != $wbwindow.document.forms[i].action) {
|
||||
$wbwindow.document.forms[i].action = new_action;
|
||||
for (var i = 0; i < $document.forms.length; i++) {
|
||||
var new_action = rewrite_url($document.forms[i].action);
|
||||
if (new_action != $document.forms[i].action) {
|
||||
$document.forms[i].action = new_action;
|
||||
}
|
||||
override_attr($wbwindow.document.forms[i], "action", "", true);
|
||||
override_attr($document.forms[i], "action", "", true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2546,6 +2479,185 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
init_bad_prefixes(wb_replay_prefix);
|
||||
}
|
||||
|
||||
|
||||
//============================================
|
||||
// New Proxy Obj Override Functions
|
||||
// Original Concept by John Berlin (https://github.com/N0taN3rd)
|
||||
//============================================
|
||||
function getAllOwnProps(obj) {
|
||||
var ownProps = [];
|
||||
|
||||
var props = Object.getOwnPropertyNames(obj);
|
||||
|
||||
for (var i = 0; i < props.length; i++) {
|
||||
var prop = props[i];
|
||||
|
||||
try {
|
||||
if (obj[prop] && !obj[prop].prototype) {
|
||||
ownProps.push(prop);
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
obj = Object.getPrototypeOf(obj);
|
||||
|
||||
while (obj) {
|
||||
props = Object.getOwnPropertyNames(obj);
|
||||
for (var i = 0; i < props.length; i++) {
|
||||
ownProps.push(props[i]);
|
||||
}
|
||||
obj = Object.getPrototypeOf(obj);
|
||||
}
|
||||
|
||||
return ownProps;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function default_proxy_get(obj, prop, ownProps) {
|
||||
if (prop == '__WBProxyRealObj__') {
|
||||
return obj;
|
||||
} else if (prop == 'location') {
|
||||
return obj._WB_wombat_location;
|
||||
} else if (prop == "_WB_wombat_obj_proxy") {
|
||||
return obj._WB_wombat_obj_proxy;
|
||||
}
|
||||
|
||||
var retVal = obj[prop];
|
||||
|
||||
var type = (typeof retVal);
|
||||
|
||||
if (type === "function" && ownProps.indexOf(prop) != -1) {
|
||||
return retVal.bind(obj);
|
||||
} else if (type === "object" && retVal && retVal._WB_wombat_obj_proxy) {
|
||||
return retVal._WB_wombat_obj_proxy;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_window_obj_proxy($wbwindow) {
|
||||
if (!$wbwindow.Proxy) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
var ownProps = getAllOwnProps($wbwindow);
|
||||
|
||||
$wbwindow._WB_wombat_obj_proxy = new $wbwindow.Proxy({}, {
|
||||
get: function(target, prop) {
|
||||
if (prop == 'top') {
|
||||
return $wbwindow.WB_wombat_top._WB_wombat_obj_proxy;
|
||||
}
|
||||
|
||||
return default_proxy_get($wbwindow, prop, ownProps);
|
||||
},
|
||||
|
||||
set: function(target, prop, value) {
|
||||
if (prop === 'location') {
|
||||
$wbwindow.WB_wombat_location = value;
|
||||
return true;
|
||||
} else if (prop === 'postMessage' || prop === 'document') {
|
||||
return true;
|
||||
} else {
|
||||
try {
|
||||
if (!Reflect.set(target, prop, value)) {
|
||||
return false;
|
||||
}
|
||||
} catch(e) {}
|
||||
|
||||
return Reflect.set($wbwindow, prop, value);
|
||||
}
|
||||
},
|
||||
has: function(target, prop) {
|
||||
return prop in $wbwindow;
|
||||
},
|
||||
ownKeys: function(target) {
|
||||
return Object.getOwnPropertyNames($wbwindow).concat(Object.getOwnPropertySymbols($wbwindow));
|
||||
},
|
||||
getOwnPropertyDescriptor: function(target, key) {
|
||||
// console.log(key);
|
||||
// hack for some JS libraries that do a for in
|
||||
// since we are proxying an empty object need to add configurable = true
|
||||
// Proxies know we are an empty object and if window says not configurable
|
||||
// throws an error
|
||||
var descriptor = Object.getOwnPropertyDescriptor($wbwindow, key);
|
||||
if (descriptor && !descriptor.configurable) {
|
||||
descriptor.configurable = true;
|
||||
}
|
||||
return descriptor;
|
||||
},
|
||||
getPrototypeOf: function(target) {
|
||||
return Object.getPrototypeOf($wbwindow);
|
||||
},
|
||||
setPrototypeOf: function(target, newProto) {
|
||||
return false;
|
||||
},
|
||||
isExtensible: function(target) {
|
||||
return Object.isExtensible($wbwindow);
|
||||
},
|
||||
preventExtensions: function(target) {
|
||||
Object.preventExtensions($wbwindow);
|
||||
return true;
|
||||
},
|
||||
deleteProperty: function(target, prop) {
|
||||
var propDescriptor = Object.getOwnPropertyDescriptor($wbwindow, prop);
|
||||
if (propDescriptor === undefined) {
|
||||
return true;
|
||||
}
|
||||
if (propDescriptor.configurable === false) {
|
||||
return false;
|
||||
}
|
||||
delete $wbwindow[prop];
|
||||
return true;
|
||||
},
|
||||
defineProperty: function(target, prop, desc) {
|
||||
desc = desc || {};
|
||||
if (!desc.value && !desc.get) {
|
||||
desc.value = $wbwindow[prop];
|
||||
}
|
||||
|
||||
var res = Reflect.defineProperty($wbwindow, prop, desc);
|
||||
|
||||
return Reflect.defineProperty(target, prop, desc);
|
||||
}
|
||||
});
|
||||
|
||||
return $wbwindow._WB_wombat_obj_proxy;
|
||||
}
|
||||
|
||||
//============================================
|
||||
function init_document_obj_proxy($document) {
|
||||
init_doc_overrides($document);
|
||||
|
||||
if (!$wbwindow.Proxy) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
var ownProps = getAllOwnProps($document);
|
||||
|
||||
$document._WB_wombat_obj_proxy = new $wbwindow.Proxy($document, {
|
||||
get: function(target, prop) {
|
||||
return default_proxy_get($document, prop, ownProps);
|
||||
},
|
||||
|
||||
set: function(target, prop, value) {
|
||||
if (prop === 'location') {
|
||||
$document.WB_wombat_location = value;
|
||||
return true;
|
||||
} else {
|
||||
target[prop] = value;
|
||||
return true;
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return $document._WB_wombat_obj_proxy;
|
||||
}
|
||||
|
||||
// End Proxy Obj Override System
|
||||
|
||||
|
||||
//============================================
|
||||
function wombat_init(wbinfo) {
|
||||
init_paths(wbinfo);
|
||||
|
||||
@ -2572,8 +2684,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
//$wbwindow.document.WB_wombat_domain = wbinfo.wombat_host;
|
||||
//$wbwindow.document.WB_wombat_referrer = extract_orig($wbwindow.document.referrer);
|
||||
|
||||
init_doc_overrides($wbwindow, wb_opts);
|
||||
|
||||
// History
|
||||
override_history_func("pushState");
|
||||
override_history_func("replaceState");
|
||||
@ -2638,6 +2748,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
init_getAttribute_override();
|
||||
}
|
||||
|
||||
// override href and src attrs
|
||||
init_attr_overrides();
|
||||
|
||||
// Cookies
|
||||
init_cookies_override();
|
||||
|
||||
// createElement attr override
|
||||
if (!wb_opts.skip_createElement) {
|
||||
init_createElement_override();
|
||||
@ -2680,6 +2796,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
// disable notifications
|
||||
init_disable_notifications();
|
||||
|
||||
// add window and document obj proxies, if available
|
||||
init_window_obj_proxy($wbwindow);
|
||||
init_document_obj_proxy($wbwindow.document);
|
||||
|
||||
init_mo_from_proxy();
|
||||
|
||||
// expose functions
|
||||
var obj = {}
|
||||
obj.extract_orig = extract_orig;
|
||||
@ -2687,6 +2809,14 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
obj.watch_elem = watch_elem;
|
||||
obj.init_new_window_wombat = init_new_window_wombat;
|
||||
obj.init_paths = init_paths;
|
||||
obj.local_init = function(name) {
|
||||
var res = $wbwindow._WB_wombat_obj_proxy[name];
|
||||
if (name === "document" && res && !res._WB_wombat_obj_proxy) {
|
||||
return init_document_obj_proxy(res) || res;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
@ -2842,3 +2972,4 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
};
|
||||
|
||||
window._WBWombat = _WBWombat;
|
||||
|
||||
|
@ -108,6 +108,16 @@ class WarcServer(BaseWarcServer):
|
||||
def list_fixed_routes(self):
|
||||
return list(self.fixed_routes.keys())
|
||||
|
||||
def get_coll_config(self, name):
|
||||
colls = self.config.get('collections', None)
|
||||
if not colls:
|
||||
return {}
|
||||
|
||||
res = colls.get(name, {})
|
||||
if not isinstance(res, dict):
|
||||
res = {'index': res}
|
||||
return res
|
||||
|
||||
def list_dynamic_routes(self):
|
||||
if not self.root_dir:
|
||||
return []
|
||||
|
@ -1,5 +1,5 @@
|
||||
six
|
||||
warcio>=1.3.4
|
||||
warcio>=1.4.0
|
||||
chardet
|
||||
requests
|
||||
redis
|
||||
|
@ -2,9 +2,15 @@
|
||||
|
||||
debug: true
|
||||
|
||||
collections_root: _test_colls
|
||||
|
||||
collections:
|
||||
pywb: ./sample_archive/cdx/
|
||||
|
||||
with-js-proxy:
|
||||
index: ./sample_archive/cdx/
|
||||
use_js_obj_proxy: true
|
||||
|
||||
# live collection
|
||||
live: $live
|
||||
|
||||
|
@ -19,7 +19,7 @@ from mock import patch
|
||||
from pywb import get_test_dir
|
||||
from pywb.warcserver.test.testutils import TempDirTests, BaseTestClass
|
||||
|
||||
from pywb.manager.manager import main
|
||||
from pywb.manager.manager import main, CollectionsManager
|
||||
|
||||
import pywb.manager.autoindex
|
||||
|
||||
@ -32,6 +32,9 @@ from pywb.apps.frontendapp import FrontEndApp
|
||||
#=============================================================================
|
||||
ARCHIVE_DIR = 'archive'
|
||||
INDEX_DIR = 'indexes'
|
||||
COLLECTIONS = '_test_colls'
|
||||
|
||||
CollectionsManager.COLLS_DIR = COLLECTIONS
|
||||
|
||||
INDEX_FILE = 'index.cdxj'
|
||||
AUTOINDEX_FILE = 'autoindex.cdxj'
|
||||
@ -76,7 +79,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
with raises(SystemExit):
|
||||
wayback(['-a', '-p', '0'])
|
||||
|
||||
colls = os.path.join(self.root_dir, 'collections')
|
||||
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||
os.mkdir(colls)
|
||||
|
||||
pywb.manager.autoindex.keep_running = False
|
||||
@ -87,7 +90,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
"""
|
||||
main(['init', 'test'])
|
||||
|
||||
colls = os.path.join(self.root_dir, 'collections')
|
||||
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||
assert os.path.isdir(colls)
|
||||
|
||||
test = os.path.join(colls, 'test')
|
||||
@ -128,7 +131,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
main(['add', 'test', warc1, warc2])
|
||||
|
||||
# Spurrious file in collections
|
||||
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
|
||||
with open(os.path.join(self.root_dir, COLLECTIONS, 'blah'), 'w+b') as fh:
|
||||
fh.write(b'foo\n')
|
||||
|
||||
with raises(IOError):
|
||||
@ -147,7 +150,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
|
||||
main(['init', 'nested'])
|
||||
|
||||
nested_root = os.path.join(self.root_dir, 'collections', 'nested', ARCHIVE_DIR)
|
||||
nested_root = os.path.join(self.root_dir, COLLECTIONS, 'nested', ARCHIVE_DIR)
|
||||
nested_a = os.path.join(nested_root, 'A')
|
||||
nested_b = os.path.join(nested_root, 'B', 'sub')
|
||||
|
||||
@ -166,7 +169,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
os.path.join(nested_b, 'example.warc.gz')
|
||||
])
|
||||
|
||||
nested_cdx = os.path.join(self.root_dir, 'collections', 'nested', INDEX_DIR, INDEX_FILE)
|
||||
nested_cdx = os.path.join(self.root_dir, COLLECTIONS, 'nested', INDEX_DIR, INDEX_FILE)
|
||||
with open(nested_cdx) as fh:
|
||||
nested_cdx_index = fh.read()
|
||||
|
||||
@ -190,7 +193,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
to ensure equality of indexes
|
||||
"""
|
||||
# ensure merged index is same as full reindex
|
||||
coll_dir = os.path.join(self.root_dir, 'collections', 'test', INDEX_DIR)
|
||||
coll_dir = os.path.join(self.root_dir, COLLECTIONS, 'test', INDEX_DIR)
|
||||
orig = os.path.join(coll_dir, INDEX_FILE)
|
||||
bak = os.path.join(coll_dir, 'index.bak')
|
||||
|
||||
@ -210,7 +213,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
def test_add_static(self):
|
||||
""" Test adding static file to collection, check access
|
||||
"""
|
||||
a_static = os.path.join(self.root_dir, 'collections', 'test', 'static', 'abc.js')
|
||||
a_static = os.path.join(self.root_dir, COLLECTIONS, 'test', 'static', 'abc.js')
|
||||
|
||||
with open(a_static, 'w+b') as fh:
|
||||
fh.write(b'/* Some JS File */')
|
||||
@ -281,7 +284,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
def test_custom_template_search(self):
|
||||
""" Test manually added custom search template search.html
|
||||
"""
|
||||
a_static = os.path.join(self.root_dir, 'collections', 'test', 'templates', 'search.html')
|
||||
a_static = os.path.join(self.root_dir, COLLECTIONS, 'test', 'templates', 'search.html')
|
||||
|
||||
with open(a_static, 'w+b') as fh:
|
||||
fh.write(b'pywb custom search page')
|
||||
@ -299,7 +302,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
Template is relative to collection-specific dir
|
||||
Add custom metadata and test its presence in custom search page
|
||||
"""
|
||||
custom_search = os.path.join(self.root_dir, 'collections', 'test',
|
||||
custom_search = os.path.join(self.root_dir, COLLECTIONS, 'test',
|
||||
'templates', 'search.html')
|
||||
|
||||
# add metadata
|
||||
@ -314,7 +317,8 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
resp.charset = 'utf-8'
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == 'text/html'
|
||||
assert 'overriden search page: {"some": "value"}' in resp.text
|
||||
assert 'overriden search page: ' in resp.text
|
||||
assert '"some": "value"' in resp.text
|
||||
|
||||
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
||||
assert resp.status_int == 200
|
||||
@ -328,7 +332,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
|
||||
# Add collection template
|
||||
main(['template', 'foo', '--add', 'query_html'])
|
||||
assert os.path.isfile(os.path.join(self.root_dir, 'collections', 'foo', 'templates', 'query.html'))
|
||||
assert os.path.isfile(os.path.join(self.root_dir, COLLECTIONS, 'foo', 'templates', 'query.html'))
|
||||
|
||||
# overwrite -- force
|
||||
main(['template', 'foo', '--add', 'query_html', '-f'])
|
||||
@ -389,7 +393,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
def test_no_templates(self):
|
||||
""" Test removing templates dir, using default template again
|
||||
"""
|
||||
shutil.rmtree(os.path.join(self.root_dir, 'collections', 'foo', 'templates'))
|
||||
shutil.rmtree(os.path.join(self.root_dir, COLLECTIONS, 'foo', 'templates'))
|
||||
|
||||
self._create_app()
|
||||
|
||||
@ -462,7 +466,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
|
||||
def test_auto_index(self):
|
||||
main(['init', 'auto'])
|
||||
auto_dir = os.path.join(self.root_dir, 'collections', 'auto')
|
||||
auto_dir = os.path.join(self.root_dir, COLLECTIONS, 'auto')
|
||||
archive_dir = os.path.join(auto_dir, ARCHIVE_DIR)
|
||||
|
||||
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
||||
@ -545,7 +549,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
|
||||
def test_err_wrong_warcs(self):
|
||||
warc1 = self._get_sample_warc('example.warc.gz')
|
||||
invalid_warc = os.path.join(self.root_dir, 'collections', 'test', ARCHIVE_DIR, 'invalid.warc.gz')
|
||||
invalid_warc = os.path.join(self.root_dir, COLLECTIONS, 'test', ARCHIVE_DIR, 'invalid.warc.gz')
|
||||
|
||||
# Empty warc list, argparse calls exit
|
||||
with raises(SystemExit):
|
||||
@ -572,7 +576,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
||||
""" Test various errors with missing warcs dir,
|
||||
missing cdx dir, non dir cdx file, and missing collections root
|
||||
"""
|
||||
colls = os.path.join(self.root_dir, 'collections')
|
||||
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||
|
||||
# No Statics -- ignorable
|
||||
shutil.rmtree(os.path.join(colls, 'foo', 'static'))
|
||||
|
@ -254,11 +254,25 @@ class TestWbIntegration(BaseConfigTest):
|
||||
assert resp.content_length == 0
|
||||
assert resp.content_type == 'application/x-javascript'
|
||||
|
||||
#def test_redirect_exact(self):
|
||||
# resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
|
||||
# assert resp.status_int == 302
|
||||
def test_replay_js_obj_proxy(self, fmod):
|
||||
# test js proxy obj with jquery -- no user agent
|
||||
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod)
|
||||
|
||||
# assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_length != 0
|
||||
assert resp.content_type == 'application/x-javascript'
|
||||
|
||||
# test with Chrome user agent
|
||||
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
||||
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})
|
||||
assert 'let window = _____WB$wombat$assign$function_____(' in resp.text
|
||||
|
||||
def test_replay_js_ie11_no_obj_proxy(self, fmod):
|
||||
# IE11 user-agent, no proxy
|
||||
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
||||
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})
|
||||
|
||||
assert 'let window = _____WB$wombat$assign$function_____(' not in resp.text
|
||||
|
||||
def test_replay_non_exact(self, fmod):
|
||||
# non-exact mode, don't redirect to exact capture
|
||||
@ -448,7 +462,7 @@ class TestWbIntegration(BaseConfigTest):
|
||||
resp = self.testapp.get('/collinfo.json')
|
||||
assert resp.content_type == 'application/json'
|
||||
value = resp.json
|
||||
assert len(value['fixed']) == 4
|
||||
assert len(value['fixed']) == 5
|
||||
assert len(value['dynamic']) == 0
|
||||
|
||||
#def test_invalid_config(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user