mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
js insert rewrite improvements:
- client-side script: only rewrite if overridden objects are found in script text - server-side inline js rewrite: only rewrite if overriden objects are found, don't insert before 'javascript:' marker - tests: add improved tests for html js in attribute rewriting
This commit is contained in:
parent
1dbabef410
commit
70a09e2804
@ -233,7 +233,7 @@ class StreamingRewriter(object):
|
||||
def rewrite(self, string):
|
||||
return string
|
||||
|
||||
def rewrite_complete(self, string):
|
||||
def rewrite_complete(self, string, **kwargs):
|
||||
return self.first_buff + self.rewrite(string) + self.final_read()
|
||||
|
||||
def final_read(self):
|
||||
|
@ -248,12 +248,13 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
else:
|
||||
return ''
|
||||
|
||||
def _rewrite_script(self, script_content, ensure_window=False):
|
||||
def _rewrite_script(self, script_content, inline_attr=False):
|
||||
if not script_content:
|
||||
return ''
|
||||
|
||||
content = self.js_rewriter.rewrite_complete(script_content)
|
||||
if ensure_window:
|
||||
content = self.js_rewriter.rewrite_complete(script_content,
|
||||
inline_attr=inline_attr)
|
||||
if inline_attr:
|
||||
content = self.ADD_WINDOW.sub('window.\\1', content)
|
||||
|
||||
return content
|
||||
|
@ -162,7 +162,7 @@ class JSWombatProxyRewriterMixin(object):
|
||||
|
||||
local_init_func = '\nvar {0} = function(name) {{\
|
||||
return (self._wb_wombat && self._wb_wombat.local_init &&\
|
||||
self._wb_wombat.local_init(name)) || self[name]; }}\n\
|
||||
self._wb_wombat.local_init(name)) || self[name]; }};\n\
|
||||
if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
{{\n'
|
||||
|
||||
@ -191,7 +191,7 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
|
||||
rules = rules + [
|
||||
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
|
||||
#(r'Function\(["\']return this["\']\)', self.fixed(func_rw), 0),
|
||||
(r'(?<!\.)\blocation\b[=]\s*(?![=])', self.add_prefix('WB_wombat_'), 0),
|
||||
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(self.THIS_RW), 0),
|
||||
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + self.THIS_RW), 0),
|
||||
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(self.THIS_RW), 0),
|
||||
@ -205,10 +205,30 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ return obj; }} }}\n\
|
||||
|
||||
self.first_buff = self.local_init_func.format(self.local_init_func_name) + local_declares
|
||||
|
||||
self.close_string = '\n\n}'
|
||||
self.last_buff = '\n\n}'
|
||||
|
||||
def rewrite_complete(self, string, **kwargs):
|
||||
if not kwargs.get('inline_attr'):
|
||||
return super(JSWombatProxyRewriterMixin, self).rewrite_complete(string)
|
||||
|
||||
# check if any of the wrapped objects are used in the script
|
||||
# if not, don't rewrite
|
||||
if not any(obj in string for obj in self.local_objs):
|
||||
return string
|
||||
|
||||
if string.startswith('javascript:'):
|
||||
string = 'javascript:' + self.first_buff + self.rewrite(string[len('javascript:'):])
|
||||
else:
|
||||
string = self.first_buff + self.rewrite(string)
|
||||
|
||||
string += self.last_buff
|
||||
|
||||
string = string.replace('\n', '')
|
||||
|
||||
return string
|
||||
|
||||
def final_read(self):
|
||||
return self.close_string
|
||||
return self.last_buff
|
||||
|
||||
|
||||
# =================================================================
|
||||
|
@ -305,6 +305,17 @@ r"""
|
||||
>>> parse('<HTML><A Href="">Text</a></hTmL>')
|
||||
<html><a href="">Text</a></html>
|
||||
|
||||
# parse attr with js proxy, wrap script, prepend WB_wombat_ for location assignment
|
||||
>>> parse('<html><a href="javascript:location=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
<html><a href="javascript:{ window.WB_wombat_location='foo.html' }"></a></html>
|
||||
|
||||
# parse attr with js proxy, wrap script, no WB_wombat_ needed
|
||||
>>> parse('<html><a href="javascript:location.href=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
<html><a href="javascript:{ location.href='foo.html' }"></a></html>
|
||||
|
||||
# parse attr with js proxy, no rewrite needed
|
||||
>>> parse('<html><a href="javascript:alert()"></a></html>', js_proxy=True)
|
||||
<html><a href="javascript:alert()"></a></html>
|
||||
|
||||
|
||||
|
||||
@ -321,6 +332,7 @@ r"""
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.rewrite.html_rewriter import HTMLRewriter
|
||||
from pywb.rewrite.regex_rewriters import JSWombatProxyRewriter
|
||||
|
||||
import pprint
|
||||
import six
|
||||
@ -341,11 +353,23 @@ urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
|
||||
no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
|
||||
rewrite_base=False))
|
||||
|
||||
def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False):
|
||||
def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False,
|
||||
js_proxy=False):
|
||||
|
||||
if js_proxy:
|
||||
js_rewriter_class = JSWombatProxyRewriter
|
||||
else:
|
||||
js_rewriter_class = None
|
||||
|
||||
parser = HTMLRewriter(urlrewriter, head_insert=head_insert,
|
||||
url=ORIGINAL_URL,
|
||||
js_rewriter_class=js_rewriter_class,
|
||||
parse_comments=parse_comments)
|
||||
|
||||
if js_proxy:
|
||||
parser.js_rewriter.first_buff = '{ '
|
||||
parser.js_rewriter.last_buff = ' }'
|
||||
|
||||
if six.PY2 and isinstance(data, six.text_type):
|
||||
data = data.encode('utf-8')
|
||||
|
||||
|
@ -1308,6 +1308,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var text = elem.textContent.trim();
|
||||
|
||||
if (!text || text.indexOf("<") == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var override_props = ["window",
|
||||
"self",
|
||||
"document",
|
||||
@ -1317,6 +1323,19 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
"frames",
|
||||
"opener"];
|
||||
|
||||
var contains_props = false;
|
||||
|
||||
for (var i = 0; i < override_props.length; i++) {
|
||||
if (text.indexOf(override_props[i]) >= 0) {
|
||||
contains_props = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!contains_props) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var insert_str =
|
||||
'var _____WB$wombat$assign$function_____ = function(name) {return (self._wb_wombat && self._wb_wombat.local_init && self._wb_wombat.local_init(name)) || self[name]; }\n' +
|
||||
'if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { return obj; } }\n' +
|
||||
@ -1324,7 +1343,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
var prop;
|
||||
|
||||
for (var i = 0; i < override_props.length; i++) {
|
||||
for (i = 0; i < override_props.length; i++) {
|
||||
prop = override_props[i];
|
||||
insert_str += 'let ' + prop + ' = _____WB$wombat$assign$function_____("' + prop + '");\n';
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user