mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: HTMLRewriter should insert head_insert at end of stream, if it hasn't
been inserted by the end (and if there was some content written -- don't insert for 0-length responses) Addresses missing head insert if only head tags are present and no head, as per hypothesis/via#9
This commit is contained in:
parent
48aa73df38
commit
33f247582f
@ -83,6 +83,7 @@ class HTMLRewriterMixin(object):
|
||||
def getvalue(self):
|
||||
return b''.join(self.ls)
|
||||
|
||||
|
||||
# ===========================
|
||||
def __init__(self, url_rewriter,
|
||||
head_insert=None,
|
||||
@ -105,6 +106,8 @@ class HTMLRewriterMixin(object):
|
||||
# get opts from urlrewriter
|
||||
self.opts = url_rewriter.rewrite_opts
|
||||
|
||||
self.parsed_any = False
|
||||
|
||||
# ===========================
|
||||
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
|
||||
re.IGNORECASE | re.MULTILINE)
|
||||
@ -288,6 +291,9 @@ class HTMLRewriterMixin(object):
|
||||
|
||||
result = self.out.getvalue()
|
||||
|
||||
# track that something was parsed
|
||||
self.parsed_any = self.parsed_any or bool(string)
|
||||
|
||||
# Clear buffer to create new one for next rewrite()
|
||||
self.out = None
|
||||
|
||||
@ -338,6 +344,12 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||
self.feed(end_tag)
|
||||
self._wb_parse_context = None
|
||||
|
||||
# if haven't insert head_insert, but wrote some content
|
||||
# out, then insert head_insert now
|
||||
if self.head_insert and self.parsed_any:
|
||||
self.out.write(self.head_insert)
|
||||
self.head_insert = None
|
||||
|
||||
try:
|
||||
HTMLParser.close(self)
|
||||
except HTMLParseError: # pragma: no cover
|
||||
|
@ -98,6 +98,16 @@ def test_local_no_head():
|
||||
# link rewritten
|
||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||
|
||||
def test_local_no_head_only_title():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head_2.html',
|
||||
urlrewriter,
|
||||
head_insert_func,
|
||||
'com,example,test)/')
|
||||
|
||||
# wombat insert added
|
||||
assert '<script src="/static/__pywb/wombat.js"> </script>' in buff
|
||||
|
||||
|
||||
def test_local_no_head_banner_only():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||
bn_urlrewriter,
|
||||
|
3
sample_archive/text_content/sample_no_head_2.html
Normal file
3
sample_archive/text_content/sample_no_head_2.html
Normal file
@ -0,0 +1,3 @@
|
||||
<!DOCTYPE html>
|
||||
<title>A title</title>
|
||||
Some Text
|
Loading…
x
Reference in New Issue
Block a user