From 7ed92754460f22b5c1db21f8a5663865c0feeefb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 6 Nov 2017 22:52:19 -0800 Subject: [PATCH] rewrite improvement: add custom rewrite for 'location =' with '__WB_check_loc(location).href' to check if actually changing location at runtime, replacing fixed 'WB_wombat_' prefix --- pywb/rewrite/regex_rewriters.py | 8 +++++++- pywb/rewrite/test/test_html_rewriter.py | 6 +++--- pywb/rewrite/test/test_regex_rewriters.py | 7 +++++-- pywb/static/wombat.js | 9 +++++++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 086a3ad7..ca99196f 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -26,6 +26,10 @@ class RegexRewriter(StreamingRewriter): def add_prefix(prefix): return lambda string: prefix + string + @staticmethod + def add_suffix(suffix): + return lambda string: string + suffix + @staticmethod def archival_rewrite(rewriter): return lambda string: rewriter.rewrite(string) @@ -174,6 +178,8 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\ THIS_RW = '(this && this._WB_wombat_obj_proxy || this)' + CHECK_LOC = '(self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = ' + @classmethod def replace_str(cls, replacer): return lambda x: x.replace('this', replacer) @@ -184,7 +190,7 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\ rules = rules + [ (r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0), - (r'(?>> parse('Text') Text -# parse attr with js proxy, wrap script, prepend WB_wombat_ for location assignment +# parse attr with js proxy, rewrite location assignment >>> parse('', js_proxy=True) - + -# parse attr with js proxy, wrap script, no WB_wombat_ needed +# parse attr with js proxy, assigning to location.href, no location assignment rewrite needed >>> parse('', js_proxy=True) diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 2b586dbe..4d7c833d 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -131,10 +131,13 @@ r""" #================================================================= >>> _test_js_obj_proxy('var foo = this; location = bar') -'var foo = (this && this._WB_wombat_obj_proxy || this); location = bar' +'var foo = (this && this._WB_wombat_obj_proxy || this); location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar' >>> _test_js_obj_proxy('var that = this\n location = bar') -'var that = (this && this._WB_wombat_obj_proxy || this)\n location = bar' +'var that = (this && this._WB_wombat_obj_proxy || this)\n location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = bar' + +>>> _test_js_obj_proxy('location = "xyz"') +'location = (self.__WB_check_loc && self.__WB_check_loc(location) || {}).href = "xyz"' >>> _test_js_obj_proxy('var foo = this.location') 'var foo = (this && this._WB_wombat_obj_proxy || this).location' diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index de2505fe..576511cf 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -2068,6 +2068,15 @@ var _WBWombat = function($wbwindow, wbinfo) { } catch(e) { } + + + win.__WB_check_loc = function(loc) { + if ((loc instanceof Location) || (loc instanceof WombatLocation)) { + return this.WB_wombat_location; + } else { + return {} + } + } } //============================================