mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-25 07:27:47 +01:00
- rewrite headers after content to ensure content-length/content-encoding rewritten if content modified - header rewriter: remove proxyrewriter, set default rule to 'prefix' or 'keep' if url rewriting or not - set is_content_rw if record.content_stream(), assume content is modified - add BufferedRewriter as base for dash, hls, amf rewriting which processes the full stream - should_rw_content() determines if should attempt content rewriting - support banner-only insert mode: added HTMLInsertOnlyRewriter, enable if no custom JS rules - test: enable banner-only test mode
35 lines
959 B
Python
35 lines
959 B
Python
import re
|
|
from pywb.rewrite.content_rewriter import StreamingRewriter
|
|
|
|
|
|
# ============================================================================
|
|
class HTMLInsertOnlyRewriter(StreamingRewriter):
|
|
""" Insert custom string into HTML <head> tag
|
|
no other rewriting performed
|
|
"""
|
|
HEAD_REGEX = re.compile('<\s*head\\b[^>]*[>]+', re.I)
|
|
|
|
def __init__(self, url_rewriter, **kwargs):
|
|
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
|
|
self.head_insert = kwargs['head_insert']
|
|
|
|
self.done = False
|
|
|
|
def rewrite(self, string):
|
|
if self.done:
|
|
return string
|
|
|
|
# only try to find <head> in first buffer
|
|
self.done = True
|
|
m = self.HEAD_REGEX.search(string)
|
|
if m:
|
|
inx = m.end()
|
|
buff = string[:inx]
|
|
buff += self.head_insert
|
|
buff += string[inx:]
|
|
return buff
|
|
else:
|
|
return string
|
|
|
|
|