mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite fixes: html_rewriter do not unescape attrs!
rules: don't rewrite past end of block or line
This commit is contained in:
parent
673ff35d15
commit
3718e1d21b
@ -208,6 +208,10 @@ class HTMLRewriter(HTMLParser):
|
||||
|
||||
return result
|
||||
|
||||
# called to unescape attrs -- do not unescape!
|
||||
def unescape(self, s):
|
||||
return s
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if not self.rewrite_tag_attrs(tag, attrs, False):
|
||||
self.out.write(self.get_starttag_text())
|
||||
|
@ -31,6 +31,10 @@ r"""
|
||||
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
|
||||
<HTML><a href="#abc">Text</a></html>
|
||||
|
||||
# Ensure attr values are not unescaped
|
||||
>>> parse('<input value=""X"">X</input>')
|
||||
<input value=""X"">X</input>
|
||||
|
||||
# Unicode
|
||||
>>> parse('<a href="http://испытание.испытание/">испытание</a>')
|
||||
<a href="/web/20131226101010/http://испытание.испытание/">испытание</a>
|
||||
|
@ -23,7 +23,7 @@ rules:
|
||||
- url_prefix: 'com,facebook)/'
|
||||
rewrite:
|
||||
js_regexs:
|
||||
- match: 'Bootloader\.configurePage.*'
|
||||
- match: 'Bootloader\.configurePage[^}$]+'
|
||||
replace: '/* {0} */'
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user