1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

rewrite fixes: don't rewrite window.parent as it is overridable directly

html rewriter: ensure style is rewritten for all elements, add test!
wombat: cleanup and additional checks for assign(), setAttribute()
This commit is contained in:
Ilya Kreymer 2015-01-29 20:08:00 -08:00
parent 043ad5c860
commit 7e017fd85e
5 changed files with 22 additions and 24 deletions

View File

@ -168,10 +168,7 @@ class HTMLRewriterMixin(object):
# attr rewriting # attr rewriting
handler = self.rewrite_tags.get(tag) handler = self.rewrite_tags.get(tag)
if not handler: if not handler:
handler = self.rewrite_tags.get('') handler = {}
if not handler:
return False
self.out.write('<' + tag) self.out.write('<' + tag)

View File

@ -136,22 +136,14 @@ class JSLocationRewriterMixin(object):
(r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0), (r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0),
#todo: move to mixin? #todo: move to mixin?
(r'(?<=[\s=(){])(parent|top)\s*(?:[!}()]|==|$)', (r'(?<=[\s=(){])(top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1), RegexRewriter.add_prefix(prefix), 1),
(r'^(parent|top)\s*(?:[!}()]|==|$)', (r'^(top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1), RegexRewriter.add_prefix(prefix), 1),
(r'(?<=window\.)(parent|top)', (r'(?<=window\.)(top)',
RegexRewriter.add_prefix(prefix), 1), RegexRewriter.add_prefix(prefix), 1),
# (r'\b(top)\b[!=\W]+(?:self|window)',
# RegexRewriter.add_prefix(prefix), 1),
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
#RegexRewriter.add_prefix(prefix), 1),
] ]
super(JSLocationRewriterMixin, self).__init__(rewriter, rules) super(JSLocationRewriterMixin, self).__init__(rewriter, rules)

View File

@ -8,7 +8,7 @@ ur"""
#================================================================= #=================================================================
>>> parse('<HTML><A Href="page.html">Text</a></hTmL>') >>> parse('<HTML><A Href="page.html">Text</a></hTmL>')
<HTML><a href="/web/20131226101010/http://example.com/some/path/page.html">Text</a></html> <html><a href="/web/20131226101010/http://example.com/some/path/page.html">Text</a></html>
>>> parse('<body x="y"><img src="../img.gif"/><br/></body>') >>> parse('<body x="y"><img src="../img.gif"/><br/></body>')
<body x="y"><img src="/web/20131226101010im_/http://example.com/some/img.gif"/><br/></body> <body x="y"><img src="/web/20131226101010im_/http://example.com/some/img.gif"/><br/></body>
@ -45,7 +45,7 @@ ur"""
# Don't rewrite anchors # Don't rewrite anchors
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>') >>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
<HTML><a href="#abc">Text</a></html> <html><a href="#abc">Text</a></html>
# Ensure attr values are not unescaped # Ensure attr values are not unescaped
>>> parse('<input value="&quot;X&quot;">X</input>') >>> parse('<input value="&quot;X&quot;">X</input>')
@ -101,6 +101,9 @@ ur"""
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>') >>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
<div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div> <div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
>>> parse('<i style="background-image: url(http://foo-.bar_.example.com/)"></i>')
<i style="background-image: url(/web/20131226101010/http://foo-.bar_.example.com/)"></i>
# Style # Style
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>') >>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
<style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style> <style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style>

View File

@ -50,7 +50,7 @@ r"""
'window.WB_wombat_top != window' 'window.WB_wombat_top != window'
>>> _test_js('if(parent!=top)') >>> _test_js('if(parent!=top)')
'if(WB_wombat_parent!=WB_wombat_top)' 'if(parent!=WB_wombat_top)'
>>> _test_js('top != window') >>> _test_js('top != window')
'WB_wombat_top != window' 'WB_wombat_top != window'

View File

@ -295,7 +295,10 @@ _WBWombat = (function() {
return this._orig_loc.replace(rewrite_url(url)); return this._orig_loc.replace(rewrite_url(url));
} }
this.assign = function(url) { this.assign = function(url) {
return this._orig_loc.assign(rewrite_url(url)); var new_url = rewrite_url(url);
if (new_url != url) {
return this._orig_loc.assign(new_url);
}
} }
this.reload = loc.reload; this.reload = loc.reload;
@ -523,13 +526,16 @@ _WBWombat = (function() {
Element.prototype.setAttribute = function(name, value) { Element.prototype.setAttribute = function(name, value) {
if (name) { if (name) {
var lowername = name.toLowerCase(); var lowername = name.toLowerCase();
if (equals_any(lowername, REWRITE_ATTRS)) { if (equals_any(lowername, REWRITE_ATTRS) && typeof(value) == "string") {
if (!this._no_rewrite) { if (!this._no_rewrite) {
var old_value = value;
value = rewrite_url(value); value = rewrite_url(value);
if (value != old_value) {
this._no_rewrite = true;
}
} }
} }
} }
orig_setAttribute.call(this, name, value); orig_setAttribute.call(this, name, value);
}; };
} }