From 3718e1d21b4ab9911874a59566912ffc7874591f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 6 Mar 2014 02:29:52 -0800 Subject: [PATCH] rewrite fixes: html_rewriter do not unescape attrs! rules: don't rewrite past end of block or line --- pywb/rewrite/html_rewriter.py | 4 ++++ pywb/rewrite/test/test_rewrite.py | 4 ++++ pywb/rules.yaml | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index a6d9718d..93b89228 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -208,6 +208,10 @@ class HTMLRewriter(HTMLParser): return result + # called to unescape attrs -- do not unescape! + def unescape(self, s): + return s + def handle_starttag(self, tag, attrs): if not self.rewrite_tag_attrs(tag, attrs, False): self.out.write(self.get_starttag_text()) diff --git a/pywb/rewrite/test/test_rewrite.py b/pywb/rewrite/test/test_rewrite.py index 6915e26f..954bd035 100644 --- a/pywb/rewrite/test/test_rewrite.py +++ b/pywb/rewrite/test/test_rewrite.py @@ -31,6 +31,10 @@ r""" >>> parse('Text') Text +# Ensure attr values are not unescaped +>>> parse('X') +X + # Unicode >>> parse('испытание') испытание diff --git a/pywb/rules.yaml b/pywb/rules.yaml index ce9c7d81..d29c0b3b 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -23,7 +23,7 @@ rules: - url_prefix: 'com,facebook)/' rewrite: js_regexs: - - match: 'Bootloader\.configurePage.*' + - match: 'Bootloader\.configurePage[^}$]+' replace: '/* {0} */'