diff --git a/CHANGES.rst b/CHANGES.rst index bb9277fd..91b62f1c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +pywb 2.3.3 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* Proxy Mode: Ensure head insert added even if no ``
`` tag, insert after first tag that is not ```` or ```` (#496) + + pywb 2.3.2 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pywb/rewrite/html_insert_rewriter.py b/pywb/rewrite/html_insert_rewriter.py index 053c1231..7a5dcf26 100644 --- a/pywb/rewrite/html_insert_rewriter.py +++ b/pywb/rewrite/html_insert_rewriter.py @@ -4,10 +4,10 @@ from pywb.rewrite.content_rewriter import StreamingRewriter # ============================================================================ class HTMLInsertOnlyRewriter(StreamingRewriter): - """ Insert custom string into HTML tag + """ Insert custom string into HTML into the head, before any tag not or no other rewriting performed """ - HEAD_REGEX = re.compile('<\s*head\\b[^>]*[>]+', re.I) + NOT_HEAD_REGEX = re.compile(r'(<\s*\b)(?!(html|head))', re.I) def __init__(self, url_rewriter, **kwargs): super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False) @@ -19,16 +19,16 @@ class HTMLInsertOnlyRewriter(StreamingRewriter): if self.done: return string - # only try to find in first buffer - self.done = True - m = self.HEAD_REGEX.search(string) + m = self.NOT_HEAD_REGEX.search(string) if m: - inx = m.end() + inx = m.start() buff = string[:inx] buff += self.head_insert buff += string[inx:] + self.done = True return buff else: return string - + def final_read(self): + return '' if self.done else self.head_insert diff --git a/pywb/rewrite/test/test_html_insert_rewriter.py b/pywb/rewrite/test/test_html_insert_rewriter.py new file mode 100644 index 00000000..ed3607a4 --- /dev/null +++ b/pywb/rewrite/test/test_html_insert_rewriter.py @@ -0,0 +1,30 @@ + + + +r''' +>>> parse('