1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

rewrite: js obj proxy rewrite improvements:

- add general ' = this' rewriting to check for proxy obj
- add tests for js obj proxy regex rewriting (without first or last wrapper)
This commit is contained in:
Ilya Kreymer 2017-08-16 22:44:48 -07:00 committed by Ilya Kreymer
parent bbe3cebd2f
commit aaad583276
2 changed files with 46 additions and 7 deletions

View File

@ -21,6 +21,10 @@ class RegexRewriter(StreamingRewriter):
def format(template):
return lambda string: template.format(string)
@staticmethod
def fixed(string):
return lambda _: string
@staticmethod
def remove_https(string):
return string.replace("https", "http")
@ -33,9 +37,6 @@ class RegexRewriter(StreamingRewriter):
def archival_rewrite(rewriter):
return lambda string: rewriter.rewrite(string)
# @staticmethod
# def replacer(other):
# return lambda m, string: other
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
@ -178,12 +179,17 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
'frames',
'opener']
THIS_RW = '(this && this._WB_wombat_obj_proxy || this)'
def __init__(self, rewriter, rules=[]):
func_rw = 'Function("return {0}")'.format(self.THIS_RW)
rules = rules + [
(r'Function\(["\']return this["\']\)', RegexRewriter.format('Function("return this._WB_wombat_obj_proxy || this")'), 0),
(r'(?<![$.])\bthis\b(?=(?:\.(?:{0})))'.format('|'.join(self.local_objs)),
RegexRewriter.format('(this && this._WB_wombat_obj_proxy || this)'), 0),
(r'(?<=\.)postMessage\b\(', RegexRewriter.add_prefix('__WB_pmw(self).'), 0),
(r'Function\(["\']return this["\']\)', self.fixed(func_rw), 0),
(r'(?<![$.])\bthis\b(?=(?:\.(?:{0})\b))'.format('|'.join(self.local_objs)),
self.fixed(self.THIS_RW), 0),
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
(r'(?<=[=])\s*this\b\s*(?![.$])', self.fixed(self.THIS_RW), 0),
]
super(JSWombatProxyRewriterMixin, self).__init__(rewriter, rules)

View File

@ -126,6 +126,32 @@ r"""
>>> _test_js('"a=b&amp;http:\/\/example.com/;c=d"')
'"a=b&amp;/web/20131010/http:\\/\\/example.com/;c=d"'
#=================================================================
# JS Obj Proxy Rewriter
#=================================================================
>>> _test_js_obj_proxy('var foo = this; location = bar')
'var foo =(this && this._WB_wombat_obj_proxy || this); location = bar'
>>> _test_js_obj_proxy('var foo = this.location')
'var foo = (this && this._WB_wombat_obj_proxy || this).location'
>>> _test_js_obj_proxy('var foo = this.location2')
'var foo = this.location2'
>>> _test_js_obj_proxy('func(Function("return this"));')
'func(Function("return (this && this._WB_wombat_obj_proxy || this)"));'
>>> _test_js_obj_proxy('this.document.location = foo')
'(this && this._WB_wombat_obj_proxy || this).document.location = foo'
# not rewritten
>>> _test_js_obj_proxy('var window = this$')
'var window = this$'
>>> _test_js_obj_proxy('var window = $this')
'var window = $this'
#=================================================================
# XML Rewriting
#=================================================================
@ -206,6 +232,7 @@ r"""
#=================================================================
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.regex_rewriters import RegexRewriter, JSRewriter, CSSRewriter, XMLRewriter
from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://localhost/web/')
@ -214,6 +241,12 @@ urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/', 'https://loca
def _test_js(string, extra = []):
return JSRewriter(urlrewriter, extra).rewrite(string)
def _test_js_obj_proxy(string):
rw = JSWombatProxyRewriter(urlrewriter)
rw.first_buff = ''
rw.close_string = ''
return rw.rewrite(string)
def _test_xml(string):
return XMLRewriter(urlrewriter).rewrite(string)