1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: ensure lxml parser closes gracefully on no input

This commit is contained in:
Ilya Kreymer 2014-04-03 13:00:22 -07:00
parent 5388a0b03b
commit 3aa4a4da7a
3 changed files with 24 additions and 1 deletions

View File

@ -37,13 +37,17 @@ class LXMLHTMLRewriter(HTMLRewriterMixin):
recover=True,
)
self.started = False
def feed(self, string):
self.started = True
string = self.END_HTML.sub(u'', string)
#string = string.replace(u'</html>', u'')
self.parser.feed(string)
def _internal_close(self):
self.parser.close()
if self.started:
self.parser.close()
#=================================================================

View File

@ -104,6 +104,16 @@ ur"""
>>> parse('<!-- <a href="http://example.com"></a> -->')
<!-- <a href="http://example.com"></a> -->
# Test blank
>>> parse('')
<BLANKLINE>
# Test no parsing at all
>>> p = HTMLRewriter(urlrewriter)
>>> p.close()
''
"""
from pywb.rewrite.url_rewriter import UrlRewriter

View File

@ -110,6 +110,15 @@ ur"""
>>> parse('<!-- <a href="http://example.com"></a> -->')
<!-- <a href="http://example.com"></a> -->
# Test blank
>>> parse('')
<BLANKLINE>
# Test no parsing at all
>>> p = LXMLHTMLRewriter(urlrewriter)
>>> p.close()
''
"""
from pywb.rewrite.url_rewriter import UrlRewriter