mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: ensure lxml parser closes gracefully on no input
This commit is contained in:
parent
5388a0b03b
commit
3aa4a4da7a
@ -37,13 +37,17 @@ class LXMLHTMLRewriter(HTMLRewriterMixin):
|
||||
recover=True,
|
||||
)
|
||||
|
||||
self.started = False
|
||||
|
||||
def feed(self, string):
|
||||
self.started = True
|
||||
string = self.END_HTML.sub(u'', string)
|
||||
#string = string.replace(u'</html>', u'')
|
||||
self.parser.feed(string)
|
||||
|
||||
def _internal_close(self):
|
||||
self.parser.close()
|
||||
if self.started:
|
||||
self.parser.close()
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -104,6 +104,16 @@ ur"""
|
||||
|
||||
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
||||
<!-- <a href="http://example.com"></a> -->
|
||||
|
||||
# Test blank
|
||||
>>> parse('')
|
||||
<BLANKLINE>
|
||||
|
||||
# Test no parsing at all
|
||||
>>> p = HTMLRewriter(urlrewriter)
|
||||
>>> p.close()
|
||||
''
|
||||
|
||||
"""
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
|
@ -110,6 +110,15 @@ ur"""
|
||||
|
||||
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
||||
<!-- <a href="http://example.com"></a> -->
|
||||
|
||||
# Test blank
|
||||
>>> parse('')
|
||||
<BLANKLINE>
|
||||
|
||||
# Test no parsing at all
|
||||
>>> p = LXMLHTMLRewriter(urlrewriter)
|
||||
>>> p.close()
|
||||
''
|
||||
"""
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
|
Loading…
x
Reference in New Issue
Block a user