mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: HTMLRewriter should insert head_insert at end of stream, if it hasn't
been inserted by the end (and if there was some content written -- don't insert for 0-length responses) Addresses missing head insert if only head tags are present and no head, as per hypothesis/via#9
This commit is contained in:
parent
48aa73df38
commit
33f247582f
@ -83,6 +83,7 @@ class HTMLRewriterMixin(object):
|
|||||||
def getvalue(self):
|
def getvalue(self):
|
||||||
return b''.join(self.ls)
|
return b''.join(self.ls)
|
||||||
|
|
||||||
|
|
||||||
# ===========================
|
# ===========================
|
||||||
def __init__(self, url_rewriter,
|
def __init__(self, url_rewriter,
|
||||||
head_insert=None,
|
head_insert=None,
|
||||||
@ -105,6 +106,8 @@ class HTMLRewriterMixin(object):
|
|||||||
# get opts from urlrewriter
|
# get opts from urlrewriter
|
||||||
self.opts = url_rewriter.rewrite_opts
|
self.opts = url_rewriter.rewrite_opts
|
||||||
|
|
||||||
|
self.parsed_any = False
|
||||||
|
|
||||||
# ===========================
|
# ===========================
|
||||||
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
|
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
|
||||||
re.IGNORECASE | re.MULTILINE)
|
re.IGNORECASE | re.MULTILINE)
|
||||||
@ -288,6 +291,9 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
result = self.out.getvalue()
|
result = self.out.getvalue()
|
||||||
|
|
||||||
|
# track that something was parsed
|
||||||
|
self.parsed_any = self.parsed_any or bool(string)
|
||||||
|
|
||||||
# Clear buffer to create new one for next rewrite()
|
# Clear buffer to create new one for next rewrite()
|
||||||
self.out = None
|
self.out = None
|
||||||
|
|
||||||
@ -338,6 +344,12 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
self.feed(end_tag)
|
self.feed(end_tag)
|
||||||
self._wb_parse_context = None
|
self._wb_parse_context = None
|
||||||
|
|
||||||
|
# if haven't insert head_insert, but wrote some content
|
||||||
|
# out, then insert head_insert now
|
||||||
|
if self.head_insert and self.parsed_any:
|
||||||
|
self.out.write(self.head_insert)
|
||||||
|
self.head_insert = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
HTMLParser.close(self)
|
HTMLParser.close(self)
|
||||||
except HTMLParseError: # pragma: no cover
|
except HTMLParseError: # pragma: no cover
|
||||||
|
@ -98,6 +98,16 @@ def test_local_no_head():
|
|||||||
# link rewritten
|
# link rewritten
|
||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
|
def test_local_no_head_only_title():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head_2.html',
|
||||||
|
urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'com,example,test)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<script src="/static/__pywb/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
|
||||||
def test_local_no_head_banner_only():
|
def test_local_no_head_banner_only():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||||
bn_urlrewriter,
|
bn_urlrewriter,
|
||||||
|
3
sample_archive/text_content/sample_no_head_2.html
Normal file
3
sample_archive/text_content/sample_no_head_2.html
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<title>A title</title>
|
||||||
|
Some Text
|
Loading…
x
Reference in New Issue
Block a user