1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-31 03:04:12 +02:00
pywb/pywb/rewrite/regex_rewriters.py

372 lines
13 KiB
Python
Raw Normal View History

import re
from pywb.rewrite.content_rewriter import StreamingRewriter
from pywb.utils.loaders import load_py_name
from six.moves.urllib.parse import unquote
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class RxRules(object):
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
@staticmethod
def remove_https(string, _):
return string.replace("https", "http")
@staticmethod
def replace_str(replacer):
return lambda x, _: x.replace('this', replacer)
@staticmethod
def format(template):
return lambda string, _: template.format(string)
@staticmethod
def fixed(string):
return lambda _, _2: string
@staticmethod
def archival_rewrite():
return lambda string, rewriter: rewriter.rewrite(string)
@staticmethod
def add_prefix(prefix):
return lambda string, _: prefix + string
@staticmethod
def add_suffix(suffix):
return lambda string, _: string + suffix
@staticmethod
def compile_rules(rules):
# Build regexstr, concatenating regex list
regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
# ensure it's not middle of a word, wrap in non-capture group
regex_str = '(?<!\w)(?:' + regex_str + ')'
return re.compile(regex_str, re.M)
def __init__(self, rules=None):
self.rules = rules or []
self.regex = self.compile_rules(self.rules)
def __call__(self, extra_rules=None):
if not extra_rules:
return self.rules, self.regex
all_rules = extra_rules + self.rules
regex = self.compile_rules(all_rules)
return all_rules, regex
# =================================================================
class JSWombatProxyRules(RxRules):
def __init__(self):
local_init_func = '\nvar {0} = function(name) {{\
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
return (self._wb_wombat && self._wb_wombat.local_init && \
self._wb_wombat.local_init(name)) || self[name]; }};\n\
server side rewriting: (#475) - fixed edge case in jsonP rewriting where no callback name is supplied only ? but body has normal jsonP callback (url = https://geolocation.onetrust.com/cookieconsentpub/v1/geo/countries/EU?callback=?) - made the `!self.__WB_pmw` server side inject match the client side one done via wombat - added regex's for eval override to JSWombatProxyRules wombat: - added eval override in order to ensure AD network JS does not lockup the browser (cnn.com) - added returning a proxied value for window.parent from the window proxy object in order to ensure AD network JS does not lock up browser (cnn.com, boston.com, et. al.) - ensured that the read only storageArea property of 'StorageEvents' fired by the custom storage class is present (spec) - ensured that userland cannot create new instances of Storage and that localStorage instanceof Storage works with our override (spec) - ensured that assignments to SomeElement.[innerHTML|outerHTML], iframe.srcdoc, or style.textContent are more correctly handled in particular doing script.[innerHTML|outerHTML] = <JS> - ensured that the test used in the isArgumentsObj function does not throw errors IFF the pages JS has made it so when toString is called (linkedin.com) - ensured that Web Worker construction when using the optional options object, the options get supplied to the create worker (spec) - ensured that the expected TypeError exception thrown from DomConstructors of overridden interfaces are thrown when their pre-conditions are not met (spec) - ensured that wombat worker rewriting skipes data urls which should not be rewritten - ensured the bound function returned by the window function is the same on subsequent retrievals fixes #474 tests: - added direct testing of wombat's parts
2019-06-20 22:06:41 -04:00
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ this.__WB_source = obj; return this; }} }}\n\
{{\n'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
local_check_this_fn = 'var {0} = function (thisObj) {{ \
if (thisObj && thisObj._WB_wombat_obj_proxy) return thisObj._WB_wombat_obj_proxy; return thisObj; }};'
local_init_func_name = '_____WB$wombat$assign$function_____'
local_var_line = 'let {0} = {1}("{0}");'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
local_check_this_func_name = '_____WB$wombat$check$this$function_____'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
# we must use a function to perform the this check because most minfiers reduce the number of statements
# by turning everything into one or more expressions. Our previous rewrite was an logical expression,
# (this && this._WB_wombat_obj_proxy || this), that would cause the outer expression to be invalid when
# it was used as the LHS of certain expressions.
# e.g. assignment expressions containing non parenthesized logical expression.
# By using a function the expression injected is an call expression that plays nice in those cases
this_rw = '_____WB$wombat$check$this$function_____(this)'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
check_loc = '((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = '
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
self.local_objs = [
'window',
'self',
'document',
'location',
'top',
'parent',
'frames',
'opener'
]
local_declares = '\n'.join([local_var_line.format(obj, local_init_func_name) for obj in self.local_objs])
prop_str = '|'.join(self.local_objs)
rules = [
# rewriting 'eval(....)' - invocation
server side rewriting: (#475) - fixed edge case in jsonP rewriting where no callback name is supplied only ? but body has normal jsonP callback (url = https://geolocation.onetrust.com/cookieconsentpub/v1/geo/countries/EU?callback=?) - made the `!self.__WB_pmw` server side inject match the client side one done via wombat - added regex's for eval override to JSWombatProxyRules wombat: - added eval override in order to ensure AD network JS does not lockup the browser (cnn.com) - added returning a proxied value for window.parent from the window proxy object in order to ensure AD network JS does not lock up browser (cnn.com, boston.com, et. al.) - ensured that the read only storageArea property of 'StorageEvents' fired by the custom storage class is present (spec) - ensured that userland cannot create new instances of Storage and that localStorage instanceof Storage works with our override (spec) - ensured that assignments to SomeElement.[innerHTML|outerHTML], iframe.srcdoc, or style.textContent are more correctly handled in particular doing script.[innerHTML|outerHTML] = <JS> - ensured that the test used in the isArgumentsObj function does not throw errors IFF the pages JS has made it so when toString is called (linkedin.com) - ensured that Web Worker construction when using the optional options object, the options get supplied to the create worker (spec) - ensured that the expected TypeError exception thrown from DomConstructors of overridden interfaces are thrown when their pre-conditions are not met (spec) - ensured that wombat worker rewriting skipes data urls which should not be rewritten - ensured the bound function returned by the window function is the same on subsequent retrievals fixes #474 tests: - added direct testing of wombat's parts
2019-06-20 22:06:41 -04:00
(r'\beval\s*\(', self.add_prefix('WB_wombat_runEval(function _____evalIsEvil(_______eval_arg$$) { return eval(_______eval_arg$$); }.bind(this)).'), 0),
# rewriting 'x = eval' - no invocation
server side rewriting: (#475) - fixed edge case in jsonP rewriting where no callback name is supplied only ? but body has normal jsonP callback (url = https://geolocation.onetrust.com/cookieconsentpub/v1/geo/countries/EU?callback=?) - made the `!self.__WB_pmw` server side inject match the client side one done via wombat - added regex's for eval override to JSWombatProxyRules wombat: - added eval override in order to ensure AD network JS does not lockup the browser (cnn.com) - added returning a proxied value for window.parent from the window proxy object in order to ensure AD network JS does not lock up browser (cnn.com, boston.com, et. al.) - ensured that the read only storageArea property of 'StorageEvents' fired by the custom storage class is present (spec) - ensured that userland cannot create new instances of Storage and that localStorage instanceof Storage works with our override (spec) - ensured that assignments to SomeElement.[innerHTML|outerHTML], iframe.srcdoc, or style.textContent are more correctly handled in particular doing script.[innerHTML|outerHTML] = <JS> - ensured that the test used in the isArgumentsObj function does not throw errors IFF the pages JS has made it so when toString is called (linkedin.com) - ensured that Web Worker construction when using the optional options object, the options get supplied to the create worker (spec) - ensured that the expected TypeError exception thrown from DomConstructors of overridden interfaces are thrown when their pre-conditions are not met (spec) - ensured that wombat worker rewriting skipes data urls which should not be rewritten - ensured the bound function returned by the window function is the same on subsequent retrievals fixes #474 tests: - added direct testing of wombat's parts
2019-06-20 22:06:41 -04:00
(r'\beval\b', self.add_prefix('WB_wombat_'), 0),
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
(r'(?<![$.])\s*location\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
# rewriting 'return this'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
# rewriting 'this.' special properties access on new line, with ; prepended
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + this_rw), 0),
# rewriting 'this.' special properties access, not on new line (no ;)
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
# rewrite '= this' or ', this'
(r'(?<=[=,])\s*this\b\s*(?![.$])', self.replace_str(this_rw), 0),
# rewrite ')(this)'
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
('\}(?:\s*\))?\s*\(this\)', self.replace_str(this_rw), 0),
# rewrite this in && or || expr?
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
(r'(?<=[^|&][|&]{2})\s*this\b\s*(?![|&.$]([^|&]|$))', self.replace_str(this_rw), 0),
]
super(JSWombatProxyRules, self).__init__(rules)
wombat overhaul! fixes #449 (#451) wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
2019-05-15 14:42:51 -04:00
self.first_buff = local_check_this_fn.format(local_check_this_func_name) + local_init_func.format(
local_init_func_name) + local_declares + '\n\n'
self.last_buff = '\n\n}'
# =================================================================
class RegexRewriter(StreamingRewriter):
rules_factory = RxRules()
def __init__(self, rewriter, extra_rules=None, first_buff=''):
super(RegexRewriter, self).__init__(rewriter, first_buff=first_buff)
# rules = self.create_rules(http_prefix)
self.rules, self.regex = self.rules_factory(extra_rules)
def filter(self, m):
return True
def rewrite(self, string):
return self.regex.sub(lambda x: self.replace(x), string)
def replace(self, m):
i = 0
for _, op, count in self.rules:
i += 1
full_m = i
while count > 0:
i += 1
count -= 1
if not m.group(i):
continue
# Optional filter to skip matches
if not self.filter(m):
return m.group(0)
# Custom func
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# if not hasattr(op, '__call__'):
# op = RegexRewriter.DEFAULT_OP(op)
result = op(m.group(i), self.url_rewriter)
final_str = result
# if extracting partial match
if i != full_m:
final_str = m.string[m.start(full_m):m.start(i)]
final_str += result
final_str += m.string[m.end(i):m.end(full_m)]
return final_str
@staticmethod
def parse_rules_from_config(config):
def run_parse_rules(rewriter):
def parse_rule(obj):
match = obj.get('match')
if 'rewrite' in obj:
replace = RxRules.archival_rewrite()
elif 'function' in obj:
replace = load_py_name(obj['function'])
else:
replace = RxRules.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
result = (match, replace, group)
return result
return list(map(parse_rule, config))
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
return run_parse_rules
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class JSLocationRewriterRules(RxRules):
"""
JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: ``WB_wombat_``)
"""
def __init__(self, prefix='WB_wombat_'):
super(JSLocationRewriterRules, self).__init__(self.get_rules(prefix))
def get_rules(self, prefix):
rules = [
(r'(?<![$\'"])\b(?:location|top)\b(?![$\'":])', self.add_prefix(prefix), 0),
(r'(?<=[?])\s*(?:\w+[.])?(location)\s*(?=[:])', self.add_prefix(prefix), 1),
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self.window).'), 0),
(r'(?<=\.)frameElement\b', self.add_prefix(prefix), 0),
]
return rules
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class JSLinkAndLocationRewriterRules(JSLocationRewriterRules):
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
"""
JS Rewriter rules which also rewrite absolute http://, https:// and // urls
at the beginning of a string
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
"""
# JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
# JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])'
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-][^"\s\';&\\]*(?=["\';&\\])'
JS_HTTPX = r'(?:(?<=["\';])https?:|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@%.\\-]+/'
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
def get_rules(self, prefix):
rules = super(JSLinkAndLocationRewriterRules, self).get_rules(prefix)
rules.append((self.JS_HTTPX, RxRules.archival_rewrite(), 0))
return rules
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class JSLocationOnlyRewriter(RegexRewriter):
rules_factory = JSLocationRewriterRules()
# =================================================================
class JSLinkAndLocationRewriter(RegexRewriter):
rules_factory = JSLinkAndLocationRewriterRules()
JSRewriter = JSLinkAndLocationRewriter
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class JSWombatProxyRewriter(RegexRewriter):
"""
JS Rewriter mixin which wraps the contents of the
script in an anonymous block scope and inserts
Wombat js-proxy setup
"""
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
rules_factory = JSWombatProxyRules()
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
def __init__(self, rewriter, extra_rules=None):
super(JSWombatProxyRewriter, self).__init__(rewriter, extra_rules=extra_rules)
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
self.first_buff = self.rules_factory.first_buff
self.last_buff = self.rules_factory.last_buff
self.local_objs = self.rules_factory.local_objs
def rewrite_complete(self, string, **kwargs):
if not kwargs.get('inline_attr'):
return super(JSWombatProxyRewriter, self).rewrite_complete(string)
# check if any of the wrapped objects are used in the script
# if not, don't rewrite
if not any(obj in string for obj in self.local_objs):
return string
if string.startswith('javascript:'):
string = 'javascript:' + self.first_buff + self.rewrite(string[len('javascript:'):])
else:
string = self.first_buff + self.rewrite(string)
string += self.last_buff
string = string.replace('\n', '')
return string
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
def final_read(self):
return self.last_buff
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class JSNoneRewriter(RegexRewriter):
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
pass
# =================================================================
class JSReplaceFuzzy(object):
rx_obj = None
def __init__(self, *args, **kwargs):
super(JSReplaceFuzzy, self).__init__(*args, **kwargs)
if not self.rx_obj:
self.rx_obj = re.compile(self.rx)
def rewrite(self, string):
string = super(JSReplaceFuzzy, self).rewrite(string)
cdx = self.url_rewriter.rewrite_opts['cdx']
if cdx.get('is_fuzzy'):
expected = unquote(cdx['url'])
actual = unquote(self.url_rewriter.wburl.url)
exp_m = self.rx_obj.search(expected)
act_m = self.rx_obj.search(actual)
if exp_m and act_m:
result = string.replace(exp_m.group(1), act_m.group(1))
if result != string:
string = result
return string
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class CSSRules(RxRules):
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
"(?!url[\\s\\(])([\w.:/\\\\-]+)")
def __init__(self):
rules = [
(self.CSS_URL_REGEX, self.archival_rewrite(), 1),
(self.CSS_IMPORT_NO_URL_REGEX, self.archival_rewrite(), 1),
]
super(CSSRules, self).__init__(rules)
# =================================================================
class CSSRewriter(RegexRewriter):
rules_factory = CSSRules()
# =================================================================
class XMLRules(RxRules):
def __init__(self):
rules = [
('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
self.HTTPX_MATCH_STR + ')',
self.archival_rewrite(), 2),
]
super(XMLRules, self).__init__(rules)
JS Object Proxy Override System (#224) * Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop Changes - cli.py: add import os for os.chdir(self.r.directory) - frontendapp.py: added initial support for cors requests. - static_handler.py: add import for NotFoundException - wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording! - default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing - html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing - regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy - wombat.js: added JS Proxy support - remove print * wombat proxy: simplify mixin using 'first_buff' * js local scope rewrite/proxy work: - add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default) - new proxy toggleable with 'js_local_scope_rewrite: true' - work on integrating john's proxy work - getAllOwnProps() to generate list of functions that need to be rebound - remove non-proxy related changes for now, remove angular special cases (for now) * local scope proxy work: - add back __WB_pmw() prefix for postMessage - don't override postMessage() in proxy obj - MessageEvent resolve proxy to original window obj * js obj proxy: use local_init() to load local vars from proxy obj * wombat: js object proxy improvements: - use same object '_WB_wombat_obj_proxy' on window and document objects - reuse default_proxy_get() for get operation from window or document - resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that - override MessageEvent.source to return window proxy object * obj proxy work: - window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception - window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing) - add override_prop_to_proxy() to add override to return proxy obj for attribute - add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy server side rewrite: generalize local proxy insert, add list for local let overrides * js obj proxy work: - add default '__WB_pmw' to self if undefined (for service workers) - document.origin override - proxy obj: improved defineProperty override to work with safari - proxy obj: catch any exception in dummy obj setter * client-side rewriting: - proxy obj: catch exception (such as cross-domain access) in own props init - proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse - rewrite style: add 'cursor' attr for css url rewriting * content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped) * client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link * client-side document override improvements: - fix document.domain, document.referrer, forms add document.origin overrides to use only the document object - init_doc_overrides() called as part of proxy init - move non-document overrides to main init rewrite: add rewrite for "Function('return this')" pattern to use proxy obj * js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False) live-rewrite-server: defaults to enabled js obj proxy metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections), or collection config for static collections warcserver: get_coll_config() returns config for static collection tests: use custom test dir instead of default 'collections' dir tests: add basic test for js obj proxy update to warcio>=1.4.0 * karma tests: update to safari >10 * client-side rewrite: - ensure wombat.js is ES5 compatible (don't use let) - check if Proxy obj exists before attempting to init * js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported content_rewriter: add overridable get_rewriter() content_rewriter: fix elif -> if in should_rw_content() tests: update js proxy obj test with different user agents (supported and unsupported) karma: reset test to safari 9 * compatibility: remove shorthand notation from wombat.js * js obj proxy: override MutationObserver.observe() to retrieve original object from proxy wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
# =================================================================
class XMLRewriter(RegexRewriter):
rules_factory = XMLRules()
# custom filter to reject 'xmlns' attr
def filter(self, m):
attr = m.group(1)
if attr and attr.startswith('xmlns'):
return False
return True