mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: ensure lxml parser closes gracefully on no input
This commit is contained in:
parent
5388a0b03b
commit
3aa4a4da7a
@ -37,13 +37,17 @@ class LXMLHTMLRewriter(HTMLRewriterMixin):
|
|||||||
recover=True,
|
recover=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.started = False
|
||||||
|
|
||||||
def feed(self, string):
|
def feed(self, string):
|
||||||
|
self.started = True
|
||||||
string = self.END_HTML.sub(u'', string)
|
string = self.END_HTML.sub(u'', string)
|
||||||
#string = string.replace(u'</html>', u'')
|
#string = string.replace(u'</html>', u'')
|
||||||
self.parser.feed(string)
|
self.parser.feed(string)
|
||||||
|
|
||||||
def _internal_close(self):
|
def _internal_close(self):
|
||||||
self.parser.close()
|
if self.started:
|
||||||
|
self.parser.close()
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -104,6 +104,16 @@ ur"""
|
|||||||
|
|
||||||
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
||||||
<!-- <a href="http://example.com"></a> -->
|
<!-- <a href="http://example.com"></a> -->
|
||||||
|
|
||||||
|
# Test blank
|
||||||
|
>>> parse('')
|
||||||
|
<BLANKLINE>
|
||||||
|
|
||||||
|
# Test no parsing at all
|
||||||
|
>>> p = HTMLRewriter(urlrewriter)
|
||||||
|
>>> p.close()
|
||||||
|
''
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
@ -110,6 +110,15 @@ ur"""
|
|||||||
|
|
||||||
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
>>> parse('<!-- <a href="http://example.com"></a> -->')
|
||||||
<!-- <a href="http://example.com"></a> -->
|
<!-- <a href="http://example.com"></a> -->
|
||||||
|
|
||||||
|
# Test blank
|
||||||
|
>>> parse('')
|
||||||
|
<BLANKLINE>
|
||||||
|
|
||||||
|
# Test no parsing at all
|
||||||
|
>>> p = LXMLHTMLRewriter(urlrewriter)
|
||||||
|
>>> p.close()
|
||||||
|
''
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
Loading…
x
Reference in New Issue
Block a user