1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-25 07:27:47 +01:00
pywb/pywb/rewrite/html_insert_rewriter.py
Ilya Kreymer d8b67319e1 rewrite refactoring:
- rewrite headers after content to ensure content-length/content-encoding rewritten if content modified
- header rewriter: remove proxyrewriter, set default rule to 'prefix' or 'keep' if url rewriting or not
- set is_content_rw if record.content_stream(), assume content is modified
- add BufferedRewriter as base for dash, hls, amf rewriting which processes the full stream
- should_rw_content() determines if should attempt content rewriting
- support banner-only insert mode: added HTMLInsertOnlyRewriter, enable if no custom JS rules
- test: enable banner-only test mode
2017-05-22 18:52:17 -07:00

35 lines
959 B
Python

import re
from pywb.rewrite.content_rewriter import StreamingRewriter
# ============================================================================
class HTMLInsertOnlyRewriter(StreamingRewriter):
""" Insert custom string into HTML <head> tag
no other rewriting performed
"""
HEAD_REGEX = re.compile('<\s*head\\b[^>]*[>]+', re.I)
def __init__(self, url_rewriter, **kwargs):
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
self.head_insert = kwargs['head_insert']
self.done = False
def rewrite(self, string):
if self.done:
return string
# only try to find <head> in first buffer
self.done = True
m = self.HEAD_REGEX.search(string)
if m:
inx = m.end()
buff = string[:inx]
buff += self.head_insert
buff += string[inx:]
return buff
else:
return string