mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: js obj proxy rewrite improvements:
- add general ' = this' rewriting to check for proxy obj - add tests for js obj proxy regex rewriting (without first or last wrapper)
This commit is contained in:
parent
bbe3cebd2f
commit
aaad583276
@ -21,6 +21,10 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
def format(template):
|
def format(template):
|
||||||
return lambda string: template.format(string)
|
return lambda string: template.format(string)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fixed(string):
|
||||||
|
return lambda _: string
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_https(string):
|
def remove_https(string):
|
||||||
return string.replace("https", "http")
|
return string.replace("https", "http")
|
||||||
@ -33,9 +37,6 @@ class RegexRewriter(StreamingRewriter):
|
|||||||
def archival_rewrite(rewriter):
|
def archival_rewrite(rewriter):
|
||||||
return lambda string: rewriter.rewrite(string)
|
return lambda string: rewriter.rewrite(string)
|
||||||
|
|
||||||
# @staticmethod
|
|
||||||
# def replacer(other):
|
|
||||||
# return lambda m, string: other
|
|
||||||
|
|
||||||
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
||||||
|
|
||||||
@ -178,12 +179,17 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
|||||||
'frames',
|
'frames',
|
||||||
'opener']
|
'opener']
|
||||||
|
|
||||||
|
THIS_RW = '(this && this._WB_wombat_obj_proxy || this)'
|
||||||
|
|
||||||
def __init__(self, rewriter, rules=[]):
|
def __init__(self, rewriter, rules=[]):
|
||||||
|
func_rw = 'Function("return {0}")'.format(self.THIS_RW)
|
||||||
|
|
||||||
rules = rules + [
|
rules = rules + [
|
||||||
(r'Function\(["\']return this["\']\)', RegexRewriter.format('Function("return this._WB_wombat_obj_proxy || this")'), 0),
|
(r'Function\(["\']return this["\']\)', self.fixed(func_rw), 0),
|
||||||
(r'(?<![$.])\bthis\b(?=(?:\.(?:{0})))'.format('|'.join(self.local_objs)),
|
(r'(?<![$.])\bthis\b(?=(?:\.(?:{0})\b))'.format('|'.join(self.local_objs)),
|
||||||
RegexRewriter.format('(this && this._WB_wombat_obj_proxy || this)'), 0),
|
self.fixed(self.THIS_RW), 0),
|
||||||
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self).'), 0),
|
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
|
||||||
|
(r'(?<=[=])\s*this\b\s*(?![.$])', self.fixed(self.THIS_RW), 0),
|
||||||
]
|
]
|
||||||
|
|
||||||
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
|
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
@ -126,6 +126,32 @@ r"""
|
|||||||
>>> _test_js('"a=b&http:\/\/example.com/;c=d"')
|
>>> _test_js('"a=b&http:\/\/example.com/;c=d"')
|
||||||
'"a=b&/web/20131010/http:\\/\\/example.com/;c=d"'
|
'"a=b&/web/20131010/http:\\/\\/example.com/;c=d"'
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# JS Obj Proxy Rewriter
|
||||||
|
#=================================================================
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('var foo = this; location = bar')
|
||||||
|
'var foo =(this && this._WB_wombat_obj_proxy || this); location = bar'
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('var foo = this.location')
|
||||||
|
'var foo = (this && this._WB_wombat_obj_proxy || this).location'
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('var foo = this.location2')
|
||||||
|
'var foo = this.location2'
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('func(Function("return this"));')
|
||||||
|
'func(Function("return (this && this._WB_wombat_obj_proxy || this)"));'
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('this.document.location = foo')
|
||||||
|
'(this && this._WB_wombat_obj_proxy || this).document.location = foo'
|
||||||
|
|
||||||
|
# not rewritten
|
||||||
|
>>> _test_js_obj_proxy('var window = this$')
|
||||||
|
'var window = this$'
|
||||||
|
|
||||||
|
>>> _test_js_obj_proxy('var window = $this')
|
||||||
|
'var window = $this'
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# XML Rewriting
|
# XML Rewriting
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -206,6 +232,7 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
|
from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
|
||||||
|
from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter
|
||||||
|
|
||||||
|
|
||||||
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://localhost/web/')
|
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://localhost/web/')
|
||||||
@ -214,6 +241,12 @@ urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://loca
|
|||||||
def _test_js(string, extra = []):
|
def _test_js(string, extra = []):
|
||||||
return JSRewriter(urlrewriter, extra).rewrite(string)
|
return JSRewriter(urlrewriter, extra).rewrite(string)
|
||||||
|
|
||||||
|
def _test_js_obj_proxy(string):
|
||||||
|
rw = JSWombatProxyRewriter(urlrewriter)
|
||||||
|
rw.first_buff = ''
|
||||||
|
rw.close_string = ''
|
||||||
|
return rw.rewrite(string)
|
||||||
|
|
||||||
def _test_xml(string):
|
def _test_xml(string):
|
||||||
return XMLRewriter(urlrewriter).rewrite(string)
|
return XMLRewriter(urlrewriter).rewrite(string)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user