mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite fixes: html_rewriter do not unescape attrs!
rules: don't rewrite past end of block or line
This commit is contained in:
parent
673ff35d15
commit
3718e1d21b
@ -208,6 +208,10 @@ class HTMLRewriter(HTMLParser):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
# called to unescape attrs -- do not unescape!
|
||||||
|
def unescape(self, s):
|
||||||
|
return s
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if not self.rewrite_tag_attrs(tag, attrs, False):
|
if not self.rewrite_tag_attrs(tag, attrs, False):
|
||||||
self.out.write(self.get_starttag_text())
|
self.out.write(self.get_starttag_text())
|
||||||
|
@ -31,6 +31,10 @@ r"""
|
|||||||
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
|
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
|
||||||
<HTML><a href="#abc">Text</a></html>
|
<HTML><a href="#abc">Text</a></html>
|
||||||
|
|
||||||
|
# Ensure attr values are not unescaped
|
||||||
|
>>> parse('<input value=""X"">X</input>')
|
||||||
|
<input value=""X"">X</input>
|
||||||
|
|
||||||
# Unicode
|
# Unicode
|
||||||
>>> parse('<a href="http://испытание.испытание/">испытание</a>')
|
>>> parse('<a href="http://испытание.испытание/">испытание</a>')
|
||||||
<a href="/web/20131226101010/http://испытание.испытание/">испытание</a>
|
<a href="/web/20131226101010/http://испытание.испытание/">испытание</a>
|
||||||
|
@ -23,7 +23,7 @@ rules:
|
|||||||
- url_prefix: 'com,facebook)/'
|
- url_prefix: 'com,facebook)/'
|
||||||
rewrite:
|
rewrite:
|
||||||
js_regexs:
|
js_regexs:
|
||||||
- match: 'Bootloader\.configurePage.*'
|
- match: 'Bootloader\.configurePage[^}$]+'
|
||||||
replace: '/* {0} */'
|
replace: '/* {0} */'
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user