diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index d94e9985..a9831f3f 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -21,6 +21,10 @@ class RegexRewriter(StreamingRewriter): def format(template): return lambda string: template.format(string) + @staticmethod + def fixed(string): + return lambda _: string + @staticmethod def remove_https(string): return string.replace("https", "http") @@ -33,9 +37,6 @@ class RegexRewriter(StreamingRewriter): def archival_rewrite(rewriter): return lambda string: rewriter.rewrite(string) - # @staticmethod - # def replacer(other): - # return lambda m, string: other HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+' @@ -178,12 +179,17 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\ 'frames', 'opener'] + THIS_RW = '(this && this._WB_wombat_obj_proxy || this)' + def __init__(self, rewriter, rules=[]): + func_rw = 'Function("return {0}")'.format(self.THIS_RW) + rules = rules + [ - (r'Function\(["\']return this["\']\)', RegexRewriter.format('Function("return this._WB_wombat_obj_proxy || this")'), 0), - (r'(?>> _test_js('"a=b&http:\/\/example.com/;c=d"') '"a=b&/web/20131010/http:\\/\\/example.com/;c=d"' +#================================================================= +# JS Obj Proxy Rewriter +#================================================================= + +>>> _test_js_obj_proxy('var foo = this; location = bar') +'var foo =(this && this._WB_wombat_obj_proxy || this); location = bar' + +>>> _test_js_obj_proxy('var foo = this.location') +'var foo = (this && this._WB_wombat_obj_proxy || this).location' + +>>> _test_js_obj_proxy('var foo = this.location2') +'var foo = this.location2' + +>>> _test_js_obj_proxy('func(Function("return this"));') +'func(Function("return (this && this._WB_wombat_obj_proxy || this)"));' + +>>> _test_js_obj_proxy('this.document.location = foo') +'(this && this._WB_wombat_obj_proxy || this).document.location = foo' + +# not rewritten +>>> _test_js_obj_proxy('var window = this$') +'var window = this$' + +>>> _test_js_obj_proxy('var window = $this') +'var window = $this' + #================================================================= # XML Rewriting #================================================================= @@ -206,6 +232,7 @@ r""" #================================================================= from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter +from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://localhost/web/') @@ -214,6 +241,12 @@ urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://loca def _test_js(string, extra = []): return JSRewriter(urlrewriter, extra).rewrite(string) +def _test_js_obj_proxy(string): + rw = JSWombatProxyRewriter(urlrewriter) + rw.first_buff = '' + rw.close_string = '' + return rw.rewrite(string) + def _test_xml(string): return XMLRewriter(urlrewriter).rewrite(string)