mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
This commit is contained in:
parent
d8b6ad3a31
commit
a6ab167dd3
@ -3,6 +3,7 @@
|
|||||||
#
|
#
|
||||||
# Settings for each collection
|
# Settings for each collection
|
||||||
|
|
||||||
|
|
||||||
collections:
|
collections:
|
||||||
# <name>: <cdx_path>
|
# <name>: <cdx_path>
|
||||||
# collection will be accessed via /<name>
|
# collection will be accessed via /<name>
|
||||||
|
@ -57,7 +57,9 @@ class BaseCli(object):
|
|||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
if self.r.live:
|
if self.r.live:
|
||||||
self.extra_config = {'collections': {'live': '$live'}}
|
self.extra_config = {'collections':
|
||||||
|
{'live': {'index': '$live',
|
||||||
|
'use_js_obj_proxy': True}}}
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.run_gevent()
|
self.run_gevent()
|
||||||
@ -80,6 +82,7 @@ class ReplayCli(BaseCli):
|
|||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
super(ReplayCli, self).load()
|
super(ReplayCli, self).load()
|
||||||
|
import os
|
||||||
if self.r.directory: #pragma: no cover
|
if self.r.directory: #pragma: no cover
|
||||||
os.chdir(self.r.directory)
|
os.chdir(self.r.directory)
|
||||||
|
|
||||||
|
@ -88,13 +88,25 @@ class FrontEndApp(object):
|
|||||||
except:
|
except:
|
||||||
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
|
self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
|
||||||
|
|
||||||
|
def get_metadata(self, coll):
|
||||||
|
metadata = {'coll': coll}
|
||||||
|
|
||||||
|
if coll in self.warcserver.list_fixed_routes():
|
||||||
|
metadata.update(self.warcserver.get_coll_config(coll))
|
||||||
|
metadata['type'] = 'replay-fixed'
|
||||||
|
else:
|
||||||
|
metadata.update(self.metadata_cache.load(coll))
|
||||||
|
metadata['type'] = 'replay-dyn'
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
def serve_coll_page(self, environ, coll):
|
def serve_coll_page(self, environ, coll):
|
||||||
if not self.is_valid_coll(coll):
|
if not self.is_valid_coll(coll):
|
||||||
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
|
||||||
|
|
||||||
self.setup_paths(environ, coll)
|
self.setup_paths(environ, coll)
|
||||||
|
|
||||||
metadata = self.metadata_cache.load(coll)
|
metadata = self.get_metadata(coll)
|
||||||
|
|
||||||
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
||||||
|
|
||||||
@ -115,15 +127,10 @@ class FrontEndApp(object):
|
|||||||
if environ.get('QUERY_STRING'):
|
if environ.get('QUERY_STRING'):
|
||||||
wb_url_str += '?' + environ.get('QUERY_STRING')
|
wb_url_str += '?' + environ.get('QUERY_STRING')
|
||||||
|
|
||||||
kwargs = {'coll': coll}
|
metadata = self.get_metadata(coll)
|
||||||
|
|
||||||
if coll in self.warcserver.list_fixed_routes():
|
|
||||||
kwargs['type'] = 'replay-fixed'
|
|
||||||
else:
|
|
||||||
kwargs['type'] = 'replay-dyn'
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.rewriterapp.render_content(wb_url_str, kwargs, environ)
|
response = self.rewriterapp.render_content(wb_url_str, metadata, environ)
|
||||||
except UpstreamException as ue:
|
except UpstreamException as ue:
|
||||||
response = self.rewriterapp.handle_error(environ, ue)
|
response = self.rewriterapp.handle_error(environ, ue)
|
||||||
raise HTTPException(response=response)
|
raise HTTPException(response=response)
|
||||||
|
@ -3,10 +3,7 @@ import requests
|
|||||||
from werkzeug.http import HTTP_STATUS_CODES
|
from werkzeug.http import HTTP_STATUS_CODES
|
||||||
from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
|
from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
|
||||||
|
|
||||||
#from pywb.rewrite.rewrite_amf import RewriteAMFMixin
|
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
||||||
#from pywb.rewrite.rewrite_dash import RewriteDASHMixin
|
|
||||||
#from pywb.rewrite.rewrite_content import RewriteContent
|
|
||||||
from pywb.rewrite.default_rewriter import DefaultRewriter
|
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
||||||
@ -66,10 +63,8 @@ class RewriterApp(object):
|
|||||||
self.frame_mod = None
|
self.frame_mod = None
|
||||||
self.replay_mod = ''
|
self.replay_mod = ''
|
||||||
|
|
||||||
#frame_type = 'inverse' if framed_replay else False
|
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod)
|
||||||
|
self.js_proxy_rw = RewriterWithJSProxy(replay_mod=self.replay_mod)
|
||||||
#self.content_rewriter = Rewriter(is_framed_replay=frame_type)
|
|
||||||
self.content_rw = DefaultRewriter(replay_mod=self.replay_mod)
|
|
||||||
|
|
||||||
if not jinja_env:
|
if not jinja_env:
|
||||||
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
||||||
@ -148,8 +143,12 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
urlkey = canonicalize(wb_url.url)
|
urlkey = canonicalize(wb_url.url)
|
||||||
|
|
||||||
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url,
|
if kwargs.get('use_js_obj_proxy'):
|
||||||
self.content_rw)
|
content_rw = self.js_proxy_rw
|
||||||
|
else:
|
||||||
|
content_rw = self.default_rw
|
||||||
|
|
||||||
|
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url, content_rw)
|
||||||
|
|
||||||
inputreq.include_post_query(wb_url.url)
|
inputreq.include_post_query(wb_url.url)
|
||||||
|
|
||||||
@ -267,15 +266,8 @@ class RewriterApp(object):
|
|||||||
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
||||||
cookie_key)
|
cookie_key)
|
||||||
|
|
||||||
#result = self.content_rewriter.rewrite_content(urlrewriter,
|
urlrewriter.rewrite_opts['ua_string'] = environ.get('HTTP_USER_AGENT')
|
||||||
# record.http_headers,
|
result = content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
||||||
# record.raw_stream,
|
|
||||||
# head_insert_func,
|
|
||||||
# urlkey,
|
|
||||||
# cdx,
|
|
||||||
# cookie_rewriter,
|
|
||||||
# environ)
|
|
||||||
result = self.content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx)
|
|
||||||
|
|
||||||
status_headers, gen, is_rw = result
|
status_headers, gen, is_rw = result
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ import os
|
|||||||
from pywb.utils.loaders import LocalFileLoader
|
from pywb.utils.loaders import LocalFileLoader
|
||||||
|
|
||||||
from pywb.apps.wbrequestresponse import WbResponse
|
from pywb.apps.wbrequestresponse import WbResponse
|
||||||
|
from pywb.utils.wbexception import NotFoundException
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -37,7 +37,10 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
COLL_RX = re.compile('^[\w][-\w]*$')
|
COLL_RX = re.compile('^[\w][-\w]*$')
|
||||||
|
|
||||||
def __init__(self, coll_name, colls_dir='collections', must_exist=True):
|
COLLS_DIR = 'collections'
|
||||||
|
|
||||||
|
def __init__(self, coll_name, colls_dir=None, must_exist=True):
|
||||||
|
colls_dir = colls_dir or self.COLLS_DIR
|
||||||
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
||||||
|
|
||||||
if coll_name and not self.COLL_RX.match(coll_name):
|
if coll_name and not self.COLL_RX.match(coll_name):
|
||||||
|
@ -20,14 +20,16 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
def __init__(self, rules_file, replay_mod=''):
|
def __init__(self, rules_file, replay_mod=''):
|
||||||
self.rules = []
|
self.rules = []
|
||||||
|
self.all_rewriters = []
|
||||||
self.load_rules(rules_file)
|
self.load_rules(rules_file)
|
||||||
self.replay_mod = replay_mod
|
self.replay_mod = replay_mod
|
||||||
#for rw in self.known_rewriters:
|
|
||||||
# self.all_rewriters[rw.name] = rw
|
|
||||||
|
|
||||||
def add_rewriter(self, rw):
|
def add_rewriter(self, rw):
|
||||||
self.all_rewriters[rw.name] = rw
|
self.all_rewriters[rw.name] = rw
|
||||||
|
|
||||||
|
def get_rewriter(self, rw_type, rwinfo=None):
|
||||||
|
return self.all_rewriters.get(rw_type)
|
||||||
|
|
||||||
def load_rules(self, filename):
|
def load_rules(self, filename):
|
||||||
config = load_yaml_config(filename)
|
config = load_yaml_config(filename)
|
||||||
for rule in config.get('rules'):
|
for rule in config.get('rules'):
|
||||||
@ -68,7 +70,7 @@ class BaseContentRewriter(object):
|
|||||||
text_type = 'js-proxy'
|
text_type = 'js-proxy'
|
||||||
|
|
||||||
rw_type = rule.get(text_type, text_type)
|
rw_type = rule.get(text_type, text_type)
|
||||||
rw_class = self.all_rewriters.get(rw_type)
|
rw_class = self.get_rewriter(rw_type, rwinfo)
|
||||||
|
|
||||||
return rw_type, rw_class
|
return rw_type, rw_class
|
||||||
|
|
||||||
@ -98,7 +100,7 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
# if no js rewriter, then do banner insert only
|
# if no js rewriter, then do banner insert only
|
||||||
if not js_rewriter:
|
if not js_rewriter:
|
||||||
rw_class = self.all_rewriters.get('html-banner-only')
|
rw_class = self.get_rewriter('html-banner-only', rwinfo)
|
||||||
|
|
||||||
rw = rw_class(rwinfo.url_rewriter,
|
rw = rw_class(rwinfo.url_rewriter,
|
||||||
js_rewriter=js_rewriter,
|
js_rewriter=js_rewriter,
|
||||||
@ -146,7 +148,7 @@ class BaseContentRewriter(object):
|
|||||||
return charset
|
return charset
|
||||||
|
|
||||||
def rewrite_headers(self, rwinfo):
|
def rewrite_headers(self, rwinfo):
|
||||||
header_rw_class = self.all_rewriters.get('header')
|
header_rw_class = self.get_rewriter('header', rwinfo)
|
||||||
return header_rw_class(rwinfo)()
|
return header_rw_class(rwinfo)()
|
||||||
|
|
||||||
def __call__(self, record, url_rewriter, cookie_rewriter,
|
def __call__(self, record, url_rewriter, cookie_rewriter,
|
||||||
@ -268,7 +270,7 @@ class RewriteInfo(object):
|
|||||||
self.url_rewriter = url_rewriter
|
self.url_rewriter = url_rewriter
|
||||||
|
|
||||||
if not cookie_rewriter:
|
if not cookie_rewriter:
|
||||||
cookie_rw_class = content_rewriter.all_rewriters.get('cookie')
|
cookie_rw_class = content_rewriter.get_rewriter('cookie', self)
|
||||||
if cookie_rw_class:
|
if cookie_rw_class:
|
||||||
cookie_rewriter = cookie_rw_class(url_rewriter)
|
cookie_rewriter = cookie_rw_class(url_rewriter)
|
||||||
|
|
||||||
@ -328,20 +330,20 @@ class RewriteInfo(object):
|
|||||||
return buff
|
return buff
|
||||||
|
|
||||||
def should_rw_content(self):
|
def should_rw_content(self):
|
||||||
|
if not self.text_type:
|
||||||
|
return False
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
if self.url_rewriter.wburl.mod == 'id_':
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.text_type == 'html':
|
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||||
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
if self.text_type in ('html', 'js'):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
elif self.text_type == 'plain':
|
if self.text_type == 'plain':
|
||||||
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
if self.url_rewriter.wburl.mod not in ('js_', 'cs_'):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
elif not self.text_type:
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif self.text_type == 'css' or self.text_type == 'xml':
|
elif self.text_type == 'css' or self.text_type == 'xml':
|
||||||
if self.url_rewriter.wburl.mod == 'bn_':
|
if self.url_rewriter.wburl.mod == 'bn_':
|
||||||
return False
|
return False
|
||||||
|
@ -4,8 +4,7 @@ from pywb.rewrite.html_rewriter import HTMLRewriter
|
|||||||
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
||||||
|
|
||||||
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
from pywb.rewrite.regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter, JSWombatProxyRewriter
|
||||||
from pywb.rewrite.regex_rewriters import JSLocationOnlyRewriter, JSNoneRewriter
|
|
||||||
|
|
||||||
from pywb.rewrite.header_rewriter import PrefixHeaderRewriter
|
from pywb.rewrite.header_rewriter import PrefixHeaderRewriter
|
||||||
from pywb.rewrite.cookie_rewriter import HostScopeCookieRewriter
|
from pywb.rewrite.cookie_rewriter import HostScopeCookieRewriter
|
||||||
@ -16,10 +15,13 @@ from pywb.rewrite.rewrite_dash import RewriteDASH
|
|||||||
from pywb.rewrite.rewrite_hls import RewriteHLS
|
from pywb.rewrite.rewrite_hls import RewriteHLS
|
||||||
from pywb.rewrite.rewrite_amf import RewriteAMF
|
from pywb.rewrite.rewrite_amf import RewriteAMF
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from werkzeug.useragents import UserAgent
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class DefaultRewriter(BaseContentRewriter):
|
class DefaultRewriter(BaseContentRewriter):
|
||||||
all_rewriters = {
|
DEFAULT_REWRITERS = {
|
||||||
'header': PrefixHeaderRewriter,
|
'header': PrefixHeaderRewriter,
|
||||||
'cookie': HostScopeCookieRewriter,
|
'cookie': HostScopeCookieRewriter,
|
||||||
|
|
||||||
@ -68,10 +70,10 @@ class DefaultRewriter(BaseContentRewriter):
|
|||||||
# AMF
|
# AMF
|
||||||
'application/x-amf': 'amf',
|
'application/x-amf': 'amf',
|
||||||
|
|
||||||
# XML
|
# XML -- don't rewrite xml
|
||||||
'text/xml': 'xml',
|
#'text/xml': 'xml',
|
||||||
'application/xml': 'xml',
|
#'application/xml': 'xml',
|
||||||
'application/rss+xml': 'xml',
|
#'application/rss+xml': 'xml',
|
||||||
|
|
||||||
# PLAIN
|
# PLAIN
|
||||||
'text/plain': 'plain',
|
'text/plain': 'plain',
|
||||||
@ -80,9 +82,48 @@ class DefaultRewriter(BaseContentRewriter):
|
|||||||
def __init__(self, rules_file=None, replay_mod=''):
|
def __init__(self, rules_file=None, replay_mod=''):
|
||||||
rules_file = rules_file or 'pkg://pywb/rules.yaml'
|
rules_file = rules_file or 'pkg://pywb/rules.yaml'
|
||||||
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
||||||
|
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
|
||||||
|
|
||||||
def init_js_regex(self, regexs):
|
def init_js_regex(self, regexs):
|
||||||
return RegexRewriter.parse_rules_from_config(regexs)
|
return RegexRewriter.parse_rules_from_config(regexs)
|
||||||
|
|
||||||
def get_rewrite_types(self):
|
def get_rewrite_types(self):
|
||||||
return self.rewrite_types
|
return self.rewrite_types
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class RewriterWithJSProxy(DefaultRewriter):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(RewriterWithJSProxy, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def get_rewriter(self, rw_type, rwinfo=None):
|
||||||
|
if rw_type == 'js' and rwinfo:
|
||||||
|
# check if UA allows this
|
||||||
|
if self.ua_allows_obj_proxy(rwinfo.url_rewriter.rewrite_opts):
|
||||||
|
return JSWombatProxyRewriter
|
||||||
|
|
||||||
|
# otherwise, return default rewriter
|
||||||
|
return super(RewriterWithJSProxy, self).get_rewriter(rw_type, rwinfo)
|
||||||
|
|
||||||
|
def ua_allows_obj_proxy(self, opts):
|
||||||
|
ua = opts.get('ua')
|
||||||
|
if not ua:
|
||||||
|
ua_string = opts.get('ua_string')
|
||||||
|
if ua_string:
|
||||||
|
ua = UserAgent(ua_string)
|
||||||
|
|
||||||
|
if ua is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
supported = {
|
||||||
|
'chrome': '49.0',
|
||||||
|
'firefox': '44.0',
|
||||||
|
'safari': '10.0',
|
||||||
|
'opera': '36.0',
|
||||||
|
'edge': '12.0',
|
||||||
|
'msie': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
min_vers = supported.get(ua.browser)
|
||||||
|
|
||||||
|
return (min_vers and ua.version >= min_vers)
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from pywb.rewrite.content_rewriter import StreamingRewriter
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
def load_function(string):
|
def load_function(string):
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
@ -12,10 +11,10 @@ def load_function(string):
|
|||||||
return getattr(mod, string[1])
|
return getattr(mod, string[1])
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class RegexRewriter(StreamingRewriter):
|
class RegexRewriter(StreamingRewriter):
|
||||||
#@staticmethod
|
# @staticmethod
|
||||||
#def comment_out(string):
|
# def comment_out(string):
|
||||||
# return '/*' + string + '*/'
|
# return '/*' + string + '*/'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -34,17 +33,17 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
def archival_rewrite(rewriter):
|
def archival_rewrite(rewriter):
|
||||||
return lambda string: rewriter.rewrite(string)
|
return lambda string: rewriter.rewrite(string)
|
||||||
|
|
||||||
#@staticmethod
|
# @staticmethod
|
||||||
#def replacer(other):
|
# def replacer(other):
|
||||||
# return lambda m, string: other
|
# return lambda m, string: other
|
||||||
|
|
||||||
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
||||||
|
|
||||||
#DEFAULT_OP = add_prefix
|
# DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
def __init__(self, rewriter, rules):
|
def __init__(self, rewriter, rules):
|
||||||
super(RegexRewriter, self).__init__(rewriter)
|
super(RegexRewriter, self).__init__(rewriter)
|
||||||
#rules = self.create_rules(http_prefix)
|
# rules = self.create_rules(http_prefix)
|
||||||
|
|
||||||
# Build regexstr, concatenating regex list
|
# Build regexstr, concatenating regex list
|
||||||
regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
|
regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
|
||||||
@ -79,7 +78,7 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
return m.group(0)
|
return m.group(0)
|
||||||
|
|
||||||
# Custom func
|
# Custom func
|
||||||
#if not hasattr(op, '__call__'):
|
# if not hasattr(op, '__call__'):
|
||||||
# op = RegexRewriter.DEFAULT_OP(op)
|
# op = RegexRewriter.DEFAULT_OP(op)
|
||||||
|
|
||||||
result = op(m.group(i))
|
result = op(m.group(i))
|
||||||
@ -109,19 +108,20 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
return list(map(parse_rule, config))
|
return list(map(parse_rule, config))
|
||||||
|
|
||||||
return run_parse_rules
|
return run_parse_rules
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class JSLinkRewriterMixin(object):
|
class JSLinkRewriterMixin(object):
|
||||||
"""
|
"""
|
||||||
JS Rewriter which rewrites absolute http://, https:// and // urls
|
JS Rewriter which rewrites absolute http://, https:// and // urls
|
||||||
at the beginning of a string
|
at the beginning of a string
|
||||||
"""
|
"""
|
||||||
#JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
|
# JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
|
||||||
#JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])'
|
# JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])'
|
||||||
|
|
||||||
#JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])'
|
# JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])'
|
||||||
JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@%.\\-]+/'
|
JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@%.\\-]+/'
|
||||||
|
|
||||||
def __init__(self, rewriter, rules=[]):
|
def __init__(self, rewriter, rules=[]):
|
||||||
@ -131,7 +131,7 @@ class JSLinkRewriterMixin(object):
|
|||||||
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
|
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class JSLocationRewriterMixin(object):
|
class JSLocationRewriterMixin(object):
|
||||||
"""
|
"""
|
||||||
JS Rewriter mixin which rewrites location and domain to the
|
JS Rewriter mixin which rewrites location and domain to the
|
||||||
@ -140,46 +140,96 @@ class JSLocationRewriterMixin(object):
|
|||||||
|
|
||||||
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
||||||
rules = rules + [
|
rules = rules + [
|
||||||
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', RegexRewriter.add_prefix(prefix), 0),
|
||||||
|
|
||||||
(r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', RegexRewriter.add_prefix(prefix), 1),
|
(r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', RegexRewriter.add_prefix(prefix), 1),
|
||||||
|
|
||||||
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self.window).'), 0),
|
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self.window).'), 0),
|
||||||
|
|
||||||
(r'(?<=\.)frameElement\b', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<=\.)frameElement\b', RegexRewriter.add_prefix(prefix), 0),
|
||||||
]
|
]
|
||||||
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
|
class JSWombatProxyRewriterMixin(object):
|
||||||
|
"""
|
||||||
|
JS Rewriter mixin which wraps the contents of the
|
||||||
|
script in an anonymous block scope and inserts
|
||||||
|
Wombat js-proxy setup
|
||||||
|
"""
|
||||||
|
|
||||||
|
local_init_func = '\nvar {0} = function(name) {{\
|
||||||
|
return (self._wb_wombat && self._wb_wombat.local_init &&\
|
||||||
|
self._wb_wombat.local_init(name)) || self[name]; }}\n\
|
||||||
|
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||||
|
{{\n'
|
||||||
|
|
||||||
|
local_init_func_name = '_____WB$wombat$assign$function_____'
|
||||||
|
|
||||||
|
local_var_line = 'let {0} = {1}("{0}");'
|
||||||
|
|
||||||
|
local_objs = ['window',
|
||||||
|
'self',
|
||||||
|
'document',
|
||||||
|
'location',
|
||||||
|
'top',
|
||||||
|
'parent',
|
||||||
|
'frames',
|
||||||
|
'opener']
|
||||||
|
|
||||||
|
def __init__(self, rewriter, rules=[]):
|
||||||
|
rules = rules + [
|
||||||
|
(r'Function\(["\']return this["\']\)', RegexRewriter.format('Function("return this._WB_wombat_obj_proxy || this")'), 0),
|
||||||
|
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self).'), 0),
|
||||||
|
]
|
||||||
|
|
||||||
|
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
local_declares = '\n'.join([self.local_var_line.format(obj, self.local_init_func_name) for obj in self.local_objs])
|
||||||
|
|
||||||
|
self.first_buff = self.local_init_func.format(self.local_init_func_name) + local_declares
|
||||||
|
|
||||||
|
self.close_string = '\n\n}'
|
||||||
|
|
||||||
|
def final_read(self):
|
||||||
|
return self.close_string
|
||||||
|
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
||||||
JSLinkRewriterMixin,
|
JSLinkRewriterMixin,
|
||||||
RegexRewriter):
|
RegexRewriter):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class JSNoneRewriter(RegexRewriter):
|
class JSNoneRewriter(RegexRewriter):
|
||||||
def __init__(self, rewriter, rules=[]):
|
def __init__(self, rewriter, rules=[]):
|
||||||
super(JSNoneRewriter, self).__init__(rewriter, rules)
|
super(JSNoneRewriter, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
|
class JSWombatProxyRewriter(JSWombatProxyRewriterMixin, RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
# Set 'default' JSRewriter
|
# Set 'default' JSRewriter
|
||||||
JSRewriter = JSLinkAndLocationRewriter
|
JSRewriter = JSLinkAndLocationRewriter
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class XMLRewriter(RegexRewriter):
|
class XMLRewriter(RegexRewriter):
|
||||||
def __init__(self, rewriter, extra=[]):
|
def __init__(self, rewriter, extra=[]):
|
||||||
rules = self._create_rules(rewriter)
|
rules = self._create_rules(rewriter)
|
||||||
@ -202,9 +252,8 @@ class XMLRewriter(RegexRewriter):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =================================================================
|
||||||
class CSSRewriter(RegexRewriter):
|
class CSSRewriter(RegexRewriter):
|
||||||
|
|
||||||
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
|
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
|
||||||
|
|
||||||
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
|
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
|
||||||
|
@ -18,7 +18,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
// Wombat JS-Rewriting Library v2.31
|
// Wombat JS-Rewriting Library v2.40
|
||||||
//============================================
|
//============================================
|
||||||
|
|
||||||
|
|
||||||
@ -891,6 +891,22 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function override_prop_to_proxy(proto, prop) {
|
||||||
|
var orig_getter = get_orig_getter(proto, prop);
|
||||||
|
|
||||||
|
if (orig_getter) {
|
||||||
|
var new_getter = function() {
|
||||||
|
var res = orig_getter.call(this);
|
||||||
|
res = (res && res._WB_wombat_obj_proxy) || res;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
def_prop(proto, prop, undefined, new_getter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function override_attr_props() {
|
function override_attr_props() {
|
||||||
function is_rw_attr(attr) {
|
function is_rw_attr(attr) {
|
||||||
@ -922,9 +938,13 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
$wbwindow.Element.prototype._orig_setAttribute = orig_setAttribute;
|
$wbwindow.Element.prototype._orig_setAttribute = orig_setAttribute;
|
||||||
|
|
||||||
$wbwindow.Element.prototype.setAttribute = function(name, value) {
|
$wbwindow.Element.prototype.setAttribute = function(name, value) {
|
||||||
if (name) {
|
if (name && typeof(value) === "string") {
|
||||||
var lowername = name.toLowerCase();
|
var lowername = name.toLowerCase();
|
||||||
if (typeof(value) == "string" && should_rewrite_attr(this.tagName, lowername)) {
|
|
||||||
|
if (this.tagName == "LINK" && lowername == "href" && value.indexOf("data:text/css") == 0) {
|
||||||
|
value = rewrite_inline_style(value);
|
||||||
|
|
||||||
|
} else if (should_rewrite_attr(this.tagName, lowername)) {
|
||||||
if (!this._no_rewrite) {
|
if (!this._no_rewrite) {
|
||||||
var old_value = value;
|
var old_value = value;
|
||||||
|
|
||||||
@ -934,7 +954,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
value = rewrite_url(value, false, mod);
|
value = rewrite_url(value, false, mod);
|
||||||
}
|
}
|
||||||
} else if (lowername == "style" && typeof(value) == "string") {
|
} else if (lowername == "style") {
|
||||||
value = rewrite_style(value);
|
value = rewrite_style(value);
|
||||||
} else if (lowername == "srcset") {
|
} else if (lowername == "srcset") {
|
||||||
value = rewrite_srcset(value);
|
value = rewrite_srcset(value);
|
||||||
@ -1156,96 +1176,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//============================================
|
|
||||||
/* function init_mutation_obs($wbwindow) {
|
|
||||||
if (!$wbwindow.MutationObserver) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var m = new MutationObserver(function(records, observer)
|
|
||||||
{
|
|
||||||
for (var i = 0; i < records.length; i++) {
|
|
||||||
var r = records[i];
|
|
||||||
if (r.type == "attributes" && r.attributeName == "style") {
|
|
||||||
var style = r.target.style.cssText;
|
|
||||||
if (style.indexOf("url(") > 0) {
|
|
||||||
var new_style = rewrite_style(style);
|
|
||||||
if (new_style != style) {
|
|
||||||
r.target.style.cssText = new_style;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
m.observe($wbwindow.document.documentElement, {
|
|
||||||
childList: false,
|
|
||||||
attributes: true,
|
|
||||||
subtree: true,
|
|
||||||
//attributeOldValue: true,
|
|
||||||
attributeFilter: ["style"]});
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
//============================================
|
|
||||||
/* function init_href_src_obs($wbwindow)
|
|
||||||
{
|
|
||||||
if (!$wbwindow.MutationObserver) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var m = new MutationObserver(function(records, observer)
|
|
||||||
{
|
|
||||||
for (var i = 0; i < records.length; i++) {
|
|
||||||
var r = records[i];
|
|
||||||
if (r.type == "attributes") {
|
|
||||||
//var curr = wb_getAttribute(r.target, r.attributeName);
|
|
||||||
var curr = r.target.getAttribute(r.attributeName);
|
|
||||||
var new_url = rewrite_url(curr);
|
|
||||||
if (curr != new_url) {
|
|
||||||
wb_setAttribute.call(r.target, r.attributeName, new_url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
m.observe($wbwindow.document.documentElement, {
|
|
||||||
childList: false,
|
|
||||||
attributes: true,
|
|
||||||
subtree: true,
|
|
||||||
//attributeOldValue: true,
|
|
||||||
attributeFilter: ["src", "href"]});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//============================================
|
|
||||||
function init_iframe_insert_obs(root)
|
|
||||||
{
|
|
||||||
if (!$wbwindow.MutationObserver) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var m = new MutationObserver(function(records, observer)
|
|
||||||
{
|
|
||||||
for (var i = 0; i < records.length; i++) {
|
|
||||||
var r = records[i];
|
|
||||||
if (r.type == "childList") {
|
|
||||||
for (var j = 0; j < r.addedNodes.length; j++) {
|
|
||||||
if (r.addedNodes[j].tagName == "IFRAME") {
|
|
||||||
init_iframe_wombat(r.addedNodes[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
m.observe(root, {
|
|
||||||
childList: true,
|
|
||||||
subtree: true,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
//============================================
|
//============================================
|
||||||
function rewrite_attr(elem, name, abs_url_only) {
|
function rewrite_attr(elem, name, abs_url_only) {
|
||||||
if (!elem || !elem.getAttribute) {
|
if (!elem || !elem.getAttribute) {
|
||||||
@ -1515,6 +1445,27 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
return orig_setter;
|
return orig_setter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function rewrite_inline_style(orig) {
|
||||||
|
var decoded;
|
||||||
|
|
||||||
|
try {
|
||||||
|
decoded = decodeURIComponent(orig);
|
||||||
|
} catch (e) {
|
||||||
|
decoded = orig;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decoded != orig) {
|
||||||
|
val = rewrite_style(decoded);
|
||||||
|
var parts = val.split(",", 2);
|
||||||
|
val = parts[0] + "," + encodeURIComponent(parts[1]);
|
||||||
|
} else {
|
||||||
|
val = rewrite_style(orig);
|
||||||
|
}
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function override_attr(obj, attr, mod, default_to_setget) {
|
function override_attr(obj, attr, mod, default_to_setget) {
|
||||||
var orig_getter = get_orig_getter(obj, attr);
|
var orig_getter = get_orig_getter(obj, attr);
|
||||||
@ -1524,21 +1475,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
var val;
|
var val;
|
||||||
|
|
||||||
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
if (mod == "cs_" && orig.indexOf("data:text/css") == 0) {
|
||||||
var decoded;
|
val = rewrite_inline_style(orig);
|
||||||
|
|
||||||
try {
|
|
||||||
decoded = decodeURIComponent(orig);
|
|
||||||
} catch (e) {
|
|
||||||
decoded = orig;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (decoded != orig) {
|
|
||||||
val = rewrite_style(decoded);
|
|
||||||
var parts = val.split(",", 2);
|
|
||||||
val = parts[0] + "," + encodeURIComponent(parts[1]);
|
|
||||||
} else {
|
|
||||||
val = rewrite_style(orig);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
val = rewrite_url(orig, false, mod);
|
val = rewrite_url(orig, false, mod);
|
||||||
}
|
}
|
||||||
@ -1598,7 +1535,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_attr_overrides($wbwindow) {
|
function init_attr_overrides() {
|
||||||
override_attr($wbwindow.HTMLLinkElement.prototype, "href", "cs_");
|
override_attr($wbwindow.HTMLLinkElement.prototype, "href", "cs_");
|
||||||
override_attr($wbwindow.CSSStyleSheet.prototype, "href", "cs_");
|
override_attr($wbwindow.CSSStyleSheet.prototype, "href", "cs_");
|
||||||
override_attr($wbwindow.HTMLImageElement.prototype, "src", "im_");
|
override_attr($wbwindow.HTMLImageElement.prototype, "src", "im_");
|
||||||
@ -1631,6 +1568,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
override_style_attr(style_proto, "background", "background");
|
override_style_attr(style_proto, "background", "background");
|
||||||
override_style_attr(style_proto, "backgroundImage", "background-image");
|
override_style_attr(style_proto, "backgroundImage", "background-image");
|
||||||
|
|
||||||
|
override_style_attr(style_proto, "cursor", "cursor");
|
||||||
|
|
||||||
override_style_attr(style_proto, "listStyle", "list-style");
|
override_style_attr(style_proto, "listStyle", "list-style");
|
||||||
override_style_attr(style_proto, "listStyleImage", "list-style-image");
|
override_style_attr(style_proto, "listStyleImage", "list-style-image");
|
||||||
|
|
||||||
@ -1730,7 +1669,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
var getter = function() {
|
var getter = function() {
|
||||||
init_iframe_wombat(this);
|
init_iframe_wombat(this);
|
||||||
return orig_getter.call(this);
|
var res = orig_getter.call(this);
|
||||||
|
res = (res && res._WB_wombat_obj_proxy) || res;
|
||||||
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
def_prop(obj, prop, orig_setter, getter);
|
def_prop(obj, prop, orig_setter, getter);
|
||||||
@ -1882,6 +1823,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
replace_dom_func("appendChild");
|
replace_dom_func("appendChild");
|
||||||
replace_dom_func("insertBefore");
|
replace_dom_func("insertBefore");
|
||||||
replace_dom_func("replaceChild");
|
replace_dom_func("replaceChild");
|
||||||
|
|
||||||
|
override_prop_to_proxy($wbwindow.Node.prototype, "ownerDocument");
|
||||||
|
override_prop_to_proxy($wbwindow.HTMLHtmlElement.prototype, "parentNode");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1911,7 +1855,9 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
function receive_hash_change(event)
|
function receive_hash_change(event)
|
||||||
{
|
{
|
||||||
if (!event.data || event.source != $wbwindow.__WB_top_frame) {
|
var source = event.source.__WBProxyRealObj__ || event.source;
|
||||||
|
|
||||||
|
if (!event.data || source != $wbwindow.__WB_top_frame) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2026,6 +1972,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
source = win.__WB_win_id[event.data.src_id];
|
source = win.__WB_win_id[event.data.src_id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
source = source.__WBProxyRealObj__ || source;
|
||||||
|
|
||||||
ne = new MessageEvent("message",
|
ne = new MessageEvent("message",
|
||||||
{"bubbles": event.bubbles,
|
{"bubbles": event.bubbles,
|
||||||
"cancelable": event.cancelable,
|
"cancelable": event.cancelable,
|
||||||
@ -2073,7 +2021,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
return _orig_addEventListener.call(this, type, listener, useCapture);
|
return _orig_addEventListener.call(this, type, listener, useCapture);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$wbwindow.addEventListener = addEventListener_rewritten;
|
$wbwindow.addEventListener = addEventListener_rewritten;
|
||||||
|
|
||||||
// REMOVE
|
// REMOVE
|
||||||
@ -2123,9 +2071,23 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
addMEOverride("eventPhase");
|
addMEOverride("eventPhase");
|
||||||
addMEOverride("path");
|
addMEOverride("path");
|
||||||
|
|
||||||
|
override_prop_to_proxy($wbwindow.MessageEvent.prototype, "source");
|
||||||
|
|
||||||
$wbwindow.MessageEvent.prototype.__extended = true;
|
$wbwindow.MessageEvent.prototype.__extended = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function init_mo_from_proxy() {
|
||||||
|
var orig_observe = $wbwindow.MutationObserver.prototype.observe;
|
||||||
|
|
||||||
|
function observe_deproxy(target, options) {
|
||||||
|
target = target && target.__WBProxyRealObj__ || target;
|
||||||
|
return orig_observe.call(this, target, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
$wbwindow.MutationObserver.prototype.observe = observe_deproxy;
|
||||||
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_open_override()
|
function init_open_override()
|
||||||
{
|
{
|
||||||
@ -2140,7 +2102,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
var res = orig.call(this, strUrl, strWindowName, strWindowFeatures);
|
var res = orig.call(this, strUrl, strWindowName, strWindowFeatures);
|
||||||
init_new_window_wombat(res, strUrl);
|
init_new_window_wombat(res, strUrl);
|
||||||
return res;
|
return res;
|
||||||
}
|
};
|
||||||
|
|
||||||
$wbwindow.open = open_rewritten;
|
$wbwindow.open = open_rewritten;
|
||||||
|
|
||||||
@ -2158,7 +2120,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_cookies_override($wbwindow)
|
function init_cookies_override()
|
||||||
{
|
{
|
||||||
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
|
var cookie_path_regex = /\bPath=\'?\"?([^;'"\s]+)/i;
|
||||||
var cookie_domain_regex = /\bDomain=([^;'"\s]+)/i;
|
var cookie_domain_regex = /\bDomain=([^;'"\s]+)/i;
|
||||||
@ -2342,6 +2304,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
init_new_window_wombat(win, src);
|
init_new_window_wombat(win, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
function init_new_window_wombat(win, src) {
|
function init_new_window_wombat(win, src) {
|
||||||
if (!win || win._wb_wombat) {
|
if (!win || win._wb_wombat) {
|
||||||
return;
|
return;
|
||||||
@ -2366,76 +2329,46 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_doc_overrides($wbwindow) {
|
function init_doc_overrides($document) {
|
||||||
if (!Object.defineProperty) {
|
if (!Object.defineProperty) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($wbwindow.document._wb_override) {
|
// referrer
|
||||||
return;
|
override_prop_extract($document, "referrer");
|
||||||
}
|
|
||||||
|
|
||||||
var orig_referrer = extract_orig($wbwindow.document.referrer);
|
// origin
|
||||||
|
def_prop($document, "origin", undefined, function() { return this._WB_wombat_location.origin; });
|
||||||
var domain_info;
|
|
||||||
|
|
||||||
if ($wbwindow.wbinfo) {
|
|
||||||
domain_info = $wbwindow.wbinfo;
|
|
||||||
} else {
|
|
||||||
domain_info = wbinfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
domain_info.domain = domain_info.wombat_host;
|
|
||||||
|
|
||||||
|
// domain
|
||||||
var domain_setter = function(val) {
|
var domain_setter = function(val) {
|
||||||
if (ends_with(domain_info.wombat_host, val)) {
|
if (ends_with(this._WB_wombat_location.hostname, val)) {
|
||||||
domain_info.domain = val;
|
this.__wb_domain = val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var domain_getter = function() {
|
var domain_getter = function() {
|
||||||
return domain_info.domain;
|
return this.__wb_domain || this._WB_wombat_location.hostname;
|
||||||
}
|
}
|
||||||
|
|
||||||
// changing domain disallowed, but set as no-op to avoid errors
|
def_prop($document, "domain", domain_setter, domain_getter);
|
||||||
def_prop($wbwindow.document, "domain", domain_setter, domain_getter);
|
|
||||||
|
|
||||||
def_prop($wbwindow.document, "referrer", undefined, function() { return orig_referrer; });
|
// override form action
|
||||||
|
init_form_overrides($document);
|
||||||
|
|
||||||
// Cookies
|
|
||||||
init_cookies_override($wbwindow);
|
|
||||||
|
|
||||||
// Init mutation observer (for style only)
|
|
||||||
//init_mutation_obs($wbwindow);
|
|
||||||
|
|
||||||
// override href and src attrs
|
|
||||||
init_attr_overrides($wbwindow);
|
|
||||||
|
|
||||||
|
|
||||||
init_form_overrides($wbwindow);
|
|
||||||
|
|
||||||
|
|
||||||
// Attr observers
|
|
||||||
//if (!wb_opts.skip_attr_observers) {
|
|
||||||
// init_href_src_obs($wbwindow);
|
|
||||||
//}
|
|
||||||
|
|
||||||
$wbwindow.document._wb_override = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
// Necessary since HTMLFormElement.prototype.action is not consistently
|
// Necessary since HTMLFormElement.prototype.action is not consistently
|
||||||
// overridable
|
// overridable
|
||||||
function init_form_overrides($wbwindow) {
|
function init_form_overrides($document) {
|
||||||
var do_init_forms = function() {
|
var do_init_forms = function() {
|
||||||
for (var i = 0; i < $wbwindow.document.forms.length; i++) {
|
for (var i = 0; i < $document.forms.length; i++) {
|
||||||
var new_action = rewrite_url($wbwindow.document.forms[i].action);
|
var new_action = rewrite_url($document.forms[i].action);
|
||||||
if (new_action != $wbwindow.document.forms[i].action) {
|
if (new_action != $document.forms[i].action) {
|
||||||
$wbwindow.document.forms[i].action = new_action;
|
$document.forms[i].action = new_action;
|
||||||
}
|
}
|
||||||
override_attr($wbwindow.document.forms[i], "action", "", true);
|
override_attr($document.forms[i], "action", "", true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2546,6 +2479,185 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
init_bad_prefixes(wb_replay_prefix);
|
init_bad_prefixes(wb_replay_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
// New Proxy Obj Override Functions
|
||||||
|
// Original Concept by John Berlin (https://github.com/N0taN3rd)
|
||||||
|
//============================================
|
||||||
|
function getAllOwnProps(obj) {
|
||||||
|
var ownProps = [];
|
||||||
|
|
||||||
|
var props = Object.getOwnPropertyNames(obj);
|
||||||
|
|
||||||
|
for (var i = 0; i < props.length; i++) {
|
||||||
|
var prop = props[i];
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (obj[prop] && !obj[prop].prototype) {
|
||||||
|
ownProps.push(prop);
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
obj = Object.getPrototypeOf(obj);
|
||||||
|
|
||||||
|
while (obj) {
|
||||||
|
props = Object.getOwnPropertyNames(obj);
|
||||||
|
for (var i = 0; i < props.length; i++) {
|
||||||
|
ownProps.push(props[i]);
|
||||||
|
}
|
||||||
|
obj = Object.getPrototypeOf(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ownProps;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function default_proxy_get(obj, prop, ownProps) {
|
||||||
|
if (prop == '__WBProxyRealObj__') {
|
||||||
|
return obj;
|
||||||
|
} else if (prop == 'location') {
|
||||||
|
return obj._WB_wombat_location;
|
||||||
|
} else if (prop == "_WB_wombat_obj_proxy") {
|
||||||
|
return obj._WB_wombat_obj_proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
var retVal = obj[prop];
|
||||||
|
|
||||||
|
var type = (typeof retVal);
|
||||||
|
|
||||||
|
if (type === "function" && ownProps.indexOf(prop) != -1) {
|
||||||
|
return retVal.bind(obj);
|
||||||
|
} else if (type === "object" && retVal && retVal._WB_wombat_obj_proxy) {
|
||||||
|
return retVal._WB_wombat_obj_proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function init_window_obj_proxy($wbwindow) {
|
||||||
|
if (!$wbwindow.Proxy) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
var ownProps = getAllOwnProps($wbwindow);
|
||||||
|
|
||||||
|
$wbwindow._WB_wombat_obj_proxy = new $wbwindow.Proxy({}, {
|
||||||
|
get: function(target, prop) {
|
||||||
|
if (prop == 'top') {
|
||||||
|
return $wbwindow.WB_wombat_top._WB_wombat_obj_proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
return default_proxy_get($wbwindow, prop, ownProps);
|
||||||
|
},
|
||||||
|
|
||||||
|
set: function(target, prop, value) {
|
||||||
|
if (prop === 'location') {
|
||||||
|
$wbwindow.WB_wombat_location = value;
|
||||||
|
return true;
|
||||||
|
} else if (prop === 'postMessage' || prop === 'document') {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
if (!Reflect.set(target, prop, value)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch(e) {}
|
||||||
|
|
||||||
|
return Reflect.set($wbwindow, prop, value);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
has: function(target, prop) {
|
||||||
|
return prop in $wbwindow;
|
||||||
|
},
|
||||||
|
ownKeys: function(target) {
|
||||||
|
return Object.getOwnPropertyNames($wbwindow).concat(Object.getOwnPropertySymbols($wbwindow));
|
||||||
|
},
|
||||||
|
getOwnPropertyDescriptor: function(target, key) {
|
||||||
|
// console.log(key);
|
||||||
|
// hack for some JS libraries that do a for in
|
||||||
|
// since we are proxying an empty object need to add configurable = true
|
||||||
|
// Proxies know we are an empty object and if window says not configurable
|
||||||
|
// throws an error
|
||||||
|
var descriptor = Object.getOwnPropertyDescriptor($wbwindow, key);
|
||||||
|
if (descriptor && !descriptor.configurable) {
|
||||||
|
descriptor.configurable = true;
|
||||||
|
}
|
||||||
|
return descriptor;
|
||||||
|
},
|
||||||
|
getPrototypeOf: function(target) {
|
||||||
|
return Object.getPrototypeOf($wbwindow);
|
||||||
|
},
|
||||||
|
setPrototypeOf: function(target, newProto) {
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
isExtensible: function(target) {
|
||||||
|
return Object.isExtensible($wbwindow);
|
||||||
|
},
|
||||||
|
preventExtensions: function(target) {
|
||||||
|
Object.preventExtensions($wbwindow);
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
deleteProperty: function(target, prop) {
|
||||||
|
var propDescriptor = Object.getOwnPropertyDescriptor($wbwindow, prop);
|
||||||
|
if (propDescriptor === undefined) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (propDescriptor.configurable === false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
delete $wbwindow[prop];
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
defineProperty: function(target, prop, desc) {
|
||||||
|
desc = desc || {};
|
||||||
|
if (!desc.value && !desc.get) {
|
||||||
|
desc.value = $wbwindow[prop];
|
||||||
|
}
|
||||||
|
|
||||||
|
var res = Reflect.defineProperty($wbwindow, prop, desc);
|
||||||
|
|
||||||
|
return Reflect.defineProperty(target, prop, desc);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return $wbwindow._WB_wombat_obj_proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============================================
|
||||||
|
function init_document_obj_proxy($document) {
|
||||||
|
init_doc_overrides($document);
|
||||||
|
|
||||||
|
if (!$wbwindow.Proxy) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
var ownProps = getAllOwnProps($document);
|
||||||
|
|
||||||
|
$document._WB_wombat_obj_proxy = new $wbwindow.Proxy($document, {
|
||||||
|
get: function(target, prop) {
|
||||||
|
return default_proxy_get($document, prop, ownProps);
|
||||||
|
},
|
||||||
|
|
||||||
|
set: function(target, prop, value) {
|
||||||
|
if (prop === 'location') {
|
||||||
|
$document.WB_wombat_location = value;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
target[prop] = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return $document._WB_wombat_obj_proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// End Proxy Obj Override System
|
||||||
|
|
||||||
|
|
||||||
|
//============================================
|
||||||
function wombat_init(wbinfo) {
|
function wombat_init(wbinfo) {
|
||||||
init_paths(wbinfo);
|
init_paths(wbinfo);
|
||||||
|
|
||||||
@ -2572,8 +2684,6 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
//$wbwindow.document.WB_wombat_domain = wbinfo.wombat_host;
|
//$wbwindow.document.WB_wombat_domain = wbinfo.wombat_host;
|
||||||
//$wbwindow.document.WB_wombat_referrer = extract_orig($wbwindow.document.referrer);
|
//$wbwindow.document.WB_wombat_referrer = extract_orig($wbwindow.document.referrer);
|
||||||
|
|
||||||
init_doc_overrides($wbwindow, wb_opts);
|
|
||||||
|
|
||||||
// History
|
// History
|
||||||
override_history_func("pushState");
|
override_history_func("pushState");
|
||||||
override_history_func("replaceState");
|
override_history_func("replaceState");
|
||||||
@ -2638,6 +2748,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
init_getAttribute_override();
|
init_getAttribute_override();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// override href and src attrs
|
||||||
|
init_attr_overrides();
|
||||||
|
|
||||||
|
// Cookies
|
||||||
|
init_cookies_override();
|
||||||
|
|
||||||
// createElement attr override
|
// createElement attr override
|
||||||
if (!wb_opts.skip_createElement) {
|
if (!wb_opts.skip_createElement) {
|
||||||
init_createElement_override();
|
init_createElement_override();
|
||||||
@ -2680,6 +2796,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
// disable notifications
|
// disable notifications
|
||||||
init_disable_notifications();
|
init_disable_notifications();
|
||||||
|
|
||||||
|
// add window and document obj proxies, if available
|
||||||
|
init_window_obj_proxy($wbwindow);
|
||||||
|
init_document_obj_proxy($wbwindow.document);
|
||||||
|
|
||||||
|
init_mo_from_proxy();
|
||||||
|
|
||||||
// expose functions
|
// expose functions
|
||||||
var obj = {}
|
var obj = {}
|
||||||
obj.extract_orig = extract_orig;
|
obj.extract_orig = extract_orig;
|
||||||
@ -2687,6 +2809,14 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
obj.watch_elem = watch_elem;
|
obj.watch_elem = watch_elem;
|
||||||
obj.init_new_window_wombat = init_new_window_wombat;
|
obj.init_new_window_wombat = init_new_window_wombat;
|
||||||
obj.init_paths = init_paths;
|
obj.init_paths = init_paths;
|
||||||
|
obj.local_init = function(name) {
|
||||||
|
var res = $wbwindow._WB_wombat_obj_proxy[name];
|
||||||
|
if (name === "document" && res && !res._WB_wombat_obj_proxy) {
|
||||||
|
return init_document_obj_proxy(res) || res;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2842,3 +2972,4 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
window._WBWombat = _WBWombat;
|
window._WBWombat = _WBWombat;
|
||||||
|
|
||||||
|
@ -108,6 +108,16 @@ class WarcServer(BaseWarcServer):
|
|||||||
def list_fixed_routes(self):
|
def list_fixed_routes(self):
|
||||||
return list(self.fixed_routes.keys())
|
return list(self.fixed_routes.keys())
|
||||||
|
|
||||||
|
def get_coll_config(self, name):
|
||||||
|
colls = self.config.get('collections', None)
|
||||||
|
if not colls:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
res = colls.get(name, {})
|
||||||
|
if not isinstance(res, dict):
|
||||||
|
res = {'index': res}
|
||||||
|
return res
|
||||||
|
|
||||||
def list_dynamic_routes(self):
|
def list_dynamic_routes(self):
|
||||||
if not self.root_dir:
|
if not self.root_dir:
|
||||||
return []
|
return []
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
six
|
six
|
||||||
warcio>=1.3.4
|
warcio>=1.4.0
|
||||||
chardet
|
chardet
|
||||||
requests
|
requests
|
||||||
redis
|
redis
|
||||||
|
@ -2,9 +2,15 @@
|
|||||||
|
|
||||||
debug: true
|
debug: true
|
||||||
|
|
||||||
|
collections_root: _test_colls
|
||||||
|
|
||||||
collections:
|
collections:
|
||||||
pywb: ./sample_archive/cdx/
|
pywb: ./sample_archive/cdx/
|
||||||
|
|
||||||
|
with-js-proxy:
|
||||||
|
index: ./sample_archive/cdx/
|
||||||
|
use_js_obj_proxy: true
|
||||||
|
|
||||||
# live collection
|
# live collection
|
||||||
live: $live
|
live: $live
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ from mock import patch
|
|||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
from pywb.warcserver.test.testutils import TempDirTests, BaseTestClass
|
from pywb.warcserver.test.testutils import TempDirTests, BaseTestClass
|
||||||
|
|
||||||
from pywb.manager.manager import main
|
from pywb.manager.manager import main, CollectionsManager
|
||||||
|
|
||||||
import pywb.manager.autoindex
|
import pywb.manager.autoindex
|
||||||
|
|
||||||
@ -32,6 +32,9 @@ from pywb.apps.frontendapp import FrontEndApp
|
|||||||
#=============================================================================
|
#=============================================================================
|
||||||
ARCHIVE_DIR = 'archive'
|
ARCHIVE_DIR = 'archive'
|
||||||
INDEX_DIR = 'indexes'
|
INDEX_DIR = 'indexes'
|
||||||
|
COLLECTIONS = '_test_colls'
|
||||||
|
|
||||||
|
CollectionsManager.COLLS_DIR = COLLECTIONS
|
||||||
|
|
||||||
INDEX_FILE = 'index.cdxj'
|
INDEX_FILE = 'index.cdxj'
|
||||||
AUTOINDEX_FILE = 'autoindex.cdxj'
|
AUTOINDEX_FILE = 'autoindex.cdxj'
|
||||||
@ -76,7 +79,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
with raises(SystemExit):
|
with raises(SystemExit):
|
||||||
wayback(['-a', '-p', '0'])
|
wayback(['-a', '-p', '0'])
|
||||||
|
|
||||||
colls = os.path.join(self.root_dir, 'collections')
|
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||||
os.mkdir(colls)
|
os.mkdir(colls)
|
||||||
|
|
||||||
pywb.manager.autoindex.keep_running = False
|
pywb.manager.autoindex.keep_running = False
|
||||||
@ -87,7 +90,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
"""
|
"""
|
||||||
main(['init', 'test'])
|
main(['init', 'test'])
|
||||||
|
|
||||||
colls = os.path.join(self.root_dir, 'collections')
|
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||||
assert os.path.isdir(colls)
|
assert os.path.isdir(colls)
|
||||||
|
|
||||||
test = os.path.join(colls, 'test')
|
test = os.path.join(colls, 'test')
|
||||||
@ -128,7 +131,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
main(['add', 'test', warc1, warc2])
|
main(['add', 'test', warc1, warc2])
|
||||||
|
|
||||||
# Spurrious file in collections
|
# Spurrious file in collections
|
||||||
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
|
with open(os.path.join(self.root_dir, COLLECTIONS, 'blah'), 'w+b') as fh:
|
||||||
fh.write(b'foo\n')
|
fh.write(b'foo\n')
|
||||||
|
|
||||||
with raises(IOError):
|
with raises(IOError):
|
||||||
@ -147,7 +150,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
main(['init', 'nested'])
|
main(['init', 'nested'])
|
||||||
|
|
||||||
nested_root = os.path.join(self.root_dir, 'collections', 'nested', ARCHIVE_DIR)
|
nested_root = os.path.join(self.root_dir, COLLECTIONS, 'nested', ARCHIVE_DIR)
|
||||||
nested_a = os.path.join(nested_root, 'A')
|
nested_a = os.path.join(nested_root, 'A')
|
||||||
nested_b = os.path.join(nested_root, 'B', 'sub')
|
nested_b = os.path.join(nested_root, 'B', 'sub')
|
||||||
|
|
||||||
@ -166,7 +169,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
os.path.join(nested_b, 'example.warc.gz')
|
os.path.join(nested_b, 'example.warc.gz')
|
||||||
])
|
])
|
||||||
|
|
||||||
nested_cdx = os.path.join(self.root_dir, 'collections', 'nested', INDEX_DIR, INDEX_FILE)
|
nested_cdx = os.path.join(self.root_dir, COLLECTIONS, 'nested', INDEX_DIR, INDEX_FILE)
|
||||||
with open(nested_cdx) as fh:
|
with open(nested_cdx) as fh:
|
||||||
nested_cdx_index = fh.read()
|
nested_cdx_index = fh.read()
|
||||||
|
|
||||||
@ -190,7 +193,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
to ensure equality of indexes
|
to ensure equality of indexes
|
||||||
"""
|
"""
|
||||||
# ensure merged index is same as full reindex
|
# ensure merged index is same as full reindex
|
||||||
coll_dir = os.path.join(self.root_dir, 'collections', 'test', INDEX_DIR)
|
coll_dir = os.path.join(self.root_dir, COLLECTIONS, 'test', INDEX_DIR)
|
||||||
orig = os.path.join(coll_dir, INDEX_FILE)
|
orig = os.path.join(coll_dir, INDEX_FILE)
|
||||||
bak = os.path.join(coll_dir, 'index.bak')
|
bak = os.path.join(coll_dir, 'index.bak')
|
||||||
|
|
||||||
@ -210,7 +213,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
def test_add_static(self):
|
def test_add_static(self):
|
||||||
""" Test adding static file to collection, check access
|
""" Test adding static file to collection, check access
|
||||||
"""
|
"""
|
||||||
a_static = os.path.join(self.root_dir, 'collections', 'test', 'static', 'abc.js')
|
a_static = os.path.join(self.root_dir, COLLECTIONS, 'test', 'static', 'abc.js')
|
||||||
|
|
||||||
with open(a_static, 'w+b') as fh:
|
with open(a_static, 'w+b') as fh:
|
||||||
fh.write(b'/* Some JS File */')
|
fh.write(b'/* Some JS File */')
|
||||||
@ -281,7 +284,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
def test_custom_template_search(self):
|
def test_custom_template_search(self):
|
||||||
""" Test manually added custom search template search.html
|
""" Test manually added custom search template search.html
|
||||||
"""
|
"""
|
||||||
a_static = os.path.join(self.root_dir, 'collections', 'test', 'templates', 'search.html')
|
a_static = os.path.join(self.root_dir, COLLECTIONS, 'test', 'templates', 'search.html')
|
||||||
|
|
||||||
with open(a_static, 'w+b') as fh:
|
with open(a_static, 'w+b') as fh:
|
||||||
fh.write(b'pywb custom search page')
|
fh.write(b'pywb custom search page')
|
||||||
@ -299,7 +302,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
Template is relative to collection-specific dir
|
Template is relative to collection-specific dir
|
||||||
Add custom metadata and test its presence in custom search page
|
Add custom metadata and test its presence in custom search page
|
||||||
"""
|
"""
|
||||||
custom_search = os.path.join(self.root_dir, 'collections', 'test',
|
custom_search = os.path.join(self.root_dir, COLLECTIONS, 'test',
|
||||||
'templates', 'search.html')
|
'templates', 'search.html')
|
||||||
|
|
||||||
# add metadata
|
# add metadata
|
||||||
@ -314,7 +317,8 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
resp.charset = 'utf-8'
|
resp.charset = 'utf-8'
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == 'text/html'
|
assert resp.content_type == 'text/html'
|
||||||
assert 'overriden search page: {"some": "value"}' in resp.text
|
assert 'overriden search page: ' in resp.text
|
||||||
|
assert '"some": "value"' in resp.text
|
||||||
|
|
||||||
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
@ -328,7 +332,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
# Add collection template
|
# Add collection template
|
||||||
main(['template', 'foo', '--add', 'query_html'])
|
main(['template', 'foo', '--add', 'query_html'])
|
||||||
assert os.path.isfile(os.path.join(self.root_dir, 'collections', 'foo', 'templates', 'query.html'))
|
assert os.path.isfile(os.path.join(self.root_dir, COLLECTIONS, 'foo', 'templates', 'query.html'))
|
||||||
|
|
||||||
# overwrite -- force
|
# overwrite -- force
|
||||||
main(['template', 'foo', '--add', 'query_html', '-f'])
|
main(['template', 'foo', '--add', 'query_html', '-f'])
|
||||||
@ -389,7 +393,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
def test_no_templates(self):
|
def test_no_templates(self):
|
||||||
""" Test removing templates dir, using default template again
|
""" Test removing templates dir, using default template again
|
||||||
"""
|
"""
|
||||||
shutil.rmtree(os.path.join(self.root_dir, 'collections', 'foo', 'templates'))
|
shutil.rmtree(os.path.join(self.root_dir, COLLECTIONS, 'foo', 'templates'))
|
||||||
|
|
||||||
self._create_app()
|
self._create_app()
|
||||||
|
|
||||||
@ -462,7 +466,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
def test_auto_index(self):
|
def test_auto_index(self):
|
||||||
main(['init', 'auto'])
|
main(['init', 'auto'])
|
||||||
auto_dir = os.path.join(self.root_dir, 'collections', 'auto')
|
auto_dir = os.path.join(self.root_dir, COLLECTIONS, 'auto')
|
||||||
archive_dir = os.path.join(auto_dir, ARCHIVE_DIR)
|
archive_dir = os.path.join(auto_dir, ARCHIVE_DIR)
|
||||||
|
|
||||||
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
||||||
@ -545,7 +549,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
def test_err_wrong_warcs(self):
|
def test_err_wrong_warcs(self):
|
||||||
warc1 = self._get_sample_warc('example.warc.gz')
|
warc1 = self._get_sample_warc('example.warc.gz')
|
||||||
invalid_warc = os.path.join(self.root_dir, 'collections', 'test', ARCHIVE_DIR, 'invalid.warc.gz')
|
invalid_warc = os.path.join(self.root_dir, COLLECTIONS, 'test', ARCHIVE_DIR, 'invalid.warc.gz')
|
||||||
|
|
||||||
# Empty warc list, argparse calls exit
|
# Empty warc list, argparse calls exit
|
||||||
with raises(SystemExit):
|
with raises(SystemExit):
|
||||||
@ -572,7 +576,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
|
|||||||
""" Test various errors with missing warcs dir,
|
""" Test various errors with missing warcs dir,
|
||||||
missing cdx dir, non dir cdx file, and missing collections root
|
missing cdx dir, non dir cdx file, and missing collections root
|
||||||
"""
|
"""
|
||||||
colls = os.path.join(self.root_dir, 'collections')
|
colls = os.path.join(self.root_dir, COLLECTIONS)
|
||||||
|
|
||||||
# No Statics -- ignorable
|
# No Statics -- ignorable
|
||||||
shutil.rmtree(os.path.join(colls, 'foo', 'static'))
|
shutil.rmtree(os.path.join(colls, 'foo', 'static'))
|
||||||
|
@ -254,11 +254,25 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert resp.content_length == 0
|
assert resp.content_length == 0
|
||||||
assert resp.content_type == 'application/x-javascript'
|
assert resp.content_type == 'application/x-javascript'
|
||||||
|
|
||||||
#def test_redirect_exact(self):
|
def test_replay_js_obj_proxy(self, fmod):
|
||||||
# resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
|
# test js proxy obj with jquery -- no user agent
|
||||||
# assert resp.status_int == 302
|
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod)
|
||||||
|
|
||||||
# assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
|
assert resp.status_int == 200
|
||||||
|
assert resp.content_length != 0
|
||||||
|
assert resp.content_type == 'application/x-javascript'
|
||||||
|
|
||||||
|
# test with Chrome user agent
|
||||||
|
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})
|
||||||
|
assert 'let window = _____WB$wombat$assign$function_____(' in resp.text
|
||||||
|
|
||||||
|
def test_replay_js_ie11_no_obj_proxy(self, fmod):
|
||||||
|
# IE11 user-agent, no proxy
|
||||||
|
resp = self.get('/with-js-proxy/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})
|
||||||
|
|
||||||
|
assert 'let window = _____WB$wombat$assign$function_____(' not in resp.text
|
||||||
|
|
||||||
def test_replay_non_exact(self, fmod):
|
def test_replay_non_exact(self, fmod):
|
||||||
# non-exact mode, don't redirect to exact capture
|
# non-exact mode, don't redirect to exact capture
|
||||||
@ -448,7 +462,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
resp = self.testapp.get('/collinfo.json')
|
resp = self.testapp.get('/collinfo.json')
|
||||||
assert resp.content_type == 'application/json'
|
assert resp.content_type == 'application/json'
|
||||||
value = resp.json
|
value = resp.json
|
||||||
assert len(value['fixed']) == 4
|
assert len(value['fixed']) == 5
|
||||||
assert len(value['dynamic']) == 0
|
assert len(value['dynamic']) == 0
|
||||||
|
|
||||||
#def test_invalid_config(self):
|
#def test_invalid_config(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user