1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

rewrite fixes: don't rewrite window.parent as it is overridable directly

html rewriter: ensure style is rewritten for all elements, add test!
wombat: cleanup and additional checks for assign(), setAttribute()
This commit is contained in:
Ilya Kreymer 2015-01-29 20:08:00 -08:00
parent 043ad5c860
commit 7e017fd85e
5 changed files with 22 additions and 24 deletions

View File

@ -168,10 +168,7 @@ class HTMLRewriterMixin(object):
# attr rewriting
handler = self.rewrite_tags.get(tag)
if not handler:
handler = self.rewrite_tags.get('')
if not handler:
return False
handler = {}
self.out.write('<' + tag)

View File

@ -136,22 +136,14 @@ class JSLocationRewriterMixin(object):
(r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0),
#todo: move to mixin?
(r'(?<=[\s=(){])(parent|top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1),
(r'(?<=[\s=(){])(top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1),
(r'^(parent|top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1),
(r'^(top)\s*(?:[!}()]|==|$)',
RegexRewriter.add_prefix(prefix), 1),
(r'(?<=window\.)(parent|top)',
RegexRewriter.add_prefix(prefix), 1),
# (r'\b(top)\b[!=\W]+(?:self|window)',
# RegexRewriter.add_prefix(prefix), 1),
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
#RegexRewriter.add_prefix(prefix), 1),
(r'(?<=window\.)(top)',
RegexRewriter.add_prefix(prefix), 1),
]
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)

View File

@ -8,7 +8,7 @@ ur"""
#=================================================================
>>> parse('<HTML><A Href="page.html">Text</a></hTmL>')
<HTML><a href="/web/20131226101010/http://example.com/some/path/page.html">Text</a></html>
<html><a href="/web/20131226101010/http://example.com/some/path/page.html">Text</a></html>
>>> parse('<body x="y"><img src="../img.gif"/><br/></body>')
<body x="y"><img src="/web/20131226101010im_/http://example.com/some/img.gif"/><br/></body>
@ -45,7 +45,7 @@ ur"""
# Don't rewrite anchors
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
<HTML><a href="#abc">Text</a></html>
<html><a href="#abc">Text</a></html>
# Ensure attr values are not unescaped
>>> parse('<input value="&quot;X&quot;">X</input>')
@ -101,6 +101,9 @@ ur"""
>>> parse('<div style="background: url(\'abc.html\')" onblah onclick="location = \'redirect.html\'"></div>')
<div style="background: url('/web/20131226101010/http://example.com/some/path/abc.html')" onblah="" onclick="WB_wombat_location = 'redirect.html'"></div>
>>> parse('<i style="background-image: url(http://foo-.bar_.example.com/)"></i>')
<i style="background-image: url(/web/20131226101010/http://foo-.bar_.example.com/)"></i>
# Style
>>> parse('<style>@import "styles.css" .a { font-face: url(\'myfont.ttf\') }</style>')
<style>@import "/web/20131226101010/http://example.com/some/path/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/path/myfont.ttf') }</style>

View File

@ -50,7 +50,7 @@ r"""
'window.WB_wombat_top != window'
>>> _test_js('if(parent!=top)')
'if(WB_wombat_parent!=WB_wombat_top)'
'if(parent!=WB_wombat_top)'
>>> _test_js('top != window')
'WB_wombat_top != window'

View File

@ -295,7 +295,10 @@ _WBWombat = (function() {
return this._orig_loc.replace(rewrite_url(url));
}
this.assign = function(url) {
return this._orig_loc.assign(rewrite_url(url));
var new_url = rewrite_url(url);
if (new_url != url) {
return this._orig_loc.assign(new_url);
}
}
this.reload = loc.reload;
@ -523,13 +526,16 @@ _WBWombat = (function() {
Element.prototype.setAttribute = function(name, value) {
if (name) {
var lowername = name.toLowerCase();
if (equals_any(lowername, REWRITE_ATTRS)) {
if (equals_any(lowername, REWRITE_ATTRS) && typeof(value) == "string") {
if (!this._no_rewrite) {
var old_value = value;
value = rewrite_url(value);
if (value != old_value) {
this._no_rewrite = true;
}
}
}
}
orig_setAttribute.call(this, name, value);
};
}