1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite fixes: html_rewriter do not unescape attrs!

rules: don't rewrite past end of block or line
This commit is contained in:
Ilya Kreymer 2014-03-06 02:29:52 -08:00
parent 673ff35d15
commit 3718e1d21b
3 changed files with 9 additions and 1 deletions

View File

@ -208,6 +208,10 @@ class HTMLRewriter(HTMLParser):
return result
# called to unescape attrs -- do not unescape!
def unescape(self, s):
return s
def handle_starttag(self, tag, attrs):
if not self.rewrite_tag_attrs(tag, attrs, False):
self.out.write(self.get_starttag_text())

View File

@ -31,6 +31,10 @@ r"""
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
<HTML><a href="#abc">Text</a></html>
# Ensure attr values are not unescaped
>>> parse('<input value="&quot;X&quot;">X</input>')
<input value="&quot;X&quot;">X</input>
# Unicode
>>> parse('<a href="http://испытание.испытание/">испытание</a>')
<a href="/web/20131226101010/http://испытание.испытание/">испытание</a>

View File

@ -23,7 +23,7 @@ rules:
- url_prefix: 'com,facebook)/'
rewrite:
js_regexs:
- match: 'Bootloader\.configurePage.*'
- match: 'Bootloader\.configurePage[^}$]+'
replace: '/* {0} */'