diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py index 18805772..b6eb8f43 100644 --- a/pywb/rewrite/content_rewriter.py +++ b/pywb/rewrite/content_rewriter.py @@ -153,9 +153,9 @@ class BaseContentRewriter(object): except: pass + # no charset detected, encode banner as ascii html entities if not head_insert_str: - rwinfo.charset = 'utf-8' - head_insert_str = head_insert_orig.encode(rwinfo.charset) + head_insert_str = head_insert_orig.encode('ascii', 'xmlcharrefreplace') head_insert_str = head_insert_str.decode('iso-8859-1') diff --git a/pywb/rewrite/test/test_content_rewriter.py b/pywb/rewrite/test/test_content_rewriter.py index 6d100a26..b89c3959 100644 --- a/pywb/rewrite/test/test_content_rewriter.py +++ b/pywb/rewrite/test/test_content_rewriter.py @@ -74,10 +74,18 @@ class TestContentRewriter(object): cdx['is_fuzzy'] = '1' cdx['is_live'] = is_live + def insert_func(rule, cdx): + return '' + if use_js_proxy: - return self.js_proxy_content_rewriter(record, url_rewriter, None, cdx=cdx, environ=environ) + rewriter = self.js_proxy_content_rewriter else: - return self.content_rewriter(record, url_rewriter, None, cdx=cdx, environ=environ) + rewriter = self.content_rewriter + + return rewriter(record, url_rewriter, cookie_rewriter=None, + head_insert_func=insert_func, + cdx=cdx, + environ=environ) def test_rewrite_html(self, headers): content = '
' @@ -154,15 +162,15 @@ class TestContentRewriter(object): assert ('Content-Type', 'text/html; charset=latin-1') in headers.headers assert b''.join(gen).decode('latin-1') == exp - def test_rewrite_html_other_encoding_anchor(self): - headers = {'Content-Type': 'text/html; charset=latin-1'} + def test_rewrite_html_no_encoding_anchor(self): + headers = {'Content-Type': 'text/html'} content = b'' headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_') exp = u'' assert is_rw - assert ('Content-Type', 'text/html; charset=latin-1') in headers.headers + assert ('Content-Type', 'text/html') in headers.headers assert b''.join(gen).decode('latin-1') == exp def test_rewrite_html_js_mod(self, headers):