1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite improvement: add custom rewrite for 'location =' with '__WB_check_loc(location).href' to check if actually changing location at runtime, replacing fixed 'WB_wombat_' prefix

This commit is contained in:
Ilya Kreymer 2017-11-06 22:52:19 -08:00
parent f34970c5ec
commit 7ed9275446
4 changed files with 24 additions and 6 deletions

View File

@ -26,6 +26,10 @@ class RegexRewriter(StreamingRewriter):
def add_prefix(prefix):
return lambda string: prefix + string
@staticmethod
def add_suffix(suffix):
return lambda string: string + suffix
@staticmethod
def archival_rewrite(rewriter):
return lambda string: rewriter.rewrite(string)
@ -174,6 +178,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
THIS_RW = '(this && this._WB_wombat_obj_proxy || this)'
CHECK_LOC = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = '
@classmethod
def replace_str(cls, replacer):
return lambda x: x.replace('this', replacer)
@ -184,7 +190,7 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
rules = rules + [
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
(r'(?<!\.)\blocation\b[=]\s*(?![=])', self.add_prefix('WB_wombat_'), 0),
(r'(?<!\.)\blocation\b\s*[=]\s*(?![=])', self.add_suffix(self.CHECK_LOC), 0),
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + self.THIS_RW), 0),
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(self.THIS_RW), 0),

View File

@ -305,11 +305,11 @@ r"""
>>> parse('<HTML><A Href="">Text</a></hTmL>')
<html><a href="">Text</a></html>
# parse attr with js proxy, wrap script, prepend WB_wombat_ for location assignment
# parse attr with js proxy, rewrite location assignment
>>> parse('<html><a href="javascript:location=\'foo.html\'"></a></html>', js_proxy=True)
<html><a href="javascript:{ window.WB_wombat_location='foo.html' }"></a></html>
<html><a href="javascript:{ location=(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = 'foo.html' }"></a></html>
# parse attr with js proxy, wrap script, no WB_wombat_ needed
# parse attr with js proxy, assigning to location.href, no location assignment rewrite needed
>>> parse('<html><a href="javascript:location.href=\'foo.html\'"></a></html>', js_proxy=True)
<html><a href="javascript:{ location.href='foo.html' }"></a></html>

View File

@ -131,10 +131,13 @@ r"""
#=================================================================
>>> _test_js_obj_proxy('var foo = this; location = bar')
'var foo = (this && this._WB_wombat_obj_proxy || this); location = bar'
'var foo = (this && this._WB_wombat_obj_proxy || this); location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar'
>>> _test_js_obj_proxy('var that = this\n location = bar')
'var that = (this && this._WB_wombat_obj_proxy || this)\n location = bar'
'var that = (this && this._WB_wombat_obj_proxy || this)\n location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar'
>>> _test_js_obj_proxy('location = "xyz"')
'location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = "xyz"'
>>> _test_js_obj_proxy('var foo = this.location')
'var foo = (this && this._WB_wombat_obj_proxy || this).location'

View File

@ -2068,6 +2068,15 @@ var _WBWombat = function($wbwindow, wbinfo) {
} catch(e) {
}
win.__WB_check_loc = function(loc) {
if ((loc instanceof Location) || (loc instanceof WombatLocation)) {
return this.WB_wombat_location;
} else {
return {}
}
}
}
//============================================