diff --git a/pywb/rewrite/lxml_html_rewriter.py b/pywb/rewrite/lxml_html_rewriter.py index 3a8e05dd..2c8a8b8a 100644 --- a/pywb/rewrite/lxml_html_rewriter.py +++ b/pywb/rewrite/lxml_html_rewriter.py @@ -37,13 +37,17 @@ class LXMLHTMLRewriter(HTMLRewriterMixin): recover=True, ) + self.started = False + def feed(self, string): + self.started = True string = self.END_HTML.sub(u'', string) #string = string.replace(u'', u'') self.parser.feed(string) def _internal_close(self): - self.parser.close() + if self.started: + self.parser.close() #================================================================= diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 9dbe55ed..6236ae1e 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -104,6 +104,16 @@ ur""" >>> parse('') + +# Test blank +>>> parse('') + + +# Test no parsing at all +>>> p = HTMLRewriter(urlrewriter) +>>> p.close() +'' + """ from pywb.rewrite.url_rewriter import UrlRewriter diff --git a/pywb/rewrite/test/test_lxml_html_rewriter.py b/pywb/rewrite/test/test_lxml_html_rewriter.py index 6beb7c7c..125977e7 100644 --- a/pywb/rewrite/test/test_lxml_html_rewriter.py +++ b/pywb/rewrite/test/test_lxml_html_rewriter.py @@ -110,6 +110,15 @@ ur""" >>> parse('') + +# Test blank +>>> parse('') + + +# Test no parsing at all +>>> p = LXMLHTMLRewriter(urlrewriter) +>>> p.close() +'' """ from pywb.rewrite.url_rewriter import UrlRewriter