mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
streaming rewriter improvements:
- add optional 'first_buff' defaulting to '' - rename close() -> final_read() - add rewrite_complete() for single-pass complete rewrite (including first buff and final_read() - rewrite_text_stream_to_gen() uses first_buff, uses member funcs directly - remove unused close() from other rewriters, only needed for HTMLParser interface
This commit is contained in:
parent
adab304f33
commit
35674c6de7
@ -201,49 +201,48 @@ class BufferedRewriter(object):
|
||||
|
||||
# ============================================================================
|
||||
class StreamingRewriter(object):
|
||||
def __init__(self, url_rewriter, align_to_line=True):
|
||||
def __init__(self, url_rewriter, align_to_line=True, first_buff=''):
|
||||
self.url_rewriter = url_rewriter
|
||||
self.align_to_line = align_to_line
|
||||
self.first_buff = first_buff
|
||||
|
||||
def __call__(self, rwinfo):
|
||||
gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream,
|
||||
rewrite_func=self.rewrite,
|
||||
final_read_func=self.close,
|
||||
align_to_line=self.align_to_line)
|
||||
|
||||
return gen
|
||||
return self.rewrite_text_stream_to_gen(rwinfo.content_stream)
|
||||
|
||||
def rewrite(self, string):
|
||||
return string
|
||||
|
||||
def close(self):
|
||||
def rewrite_complete(self, string):
|
||||
return self.first_buff + self.rewrite(string) + self.final_read()
|
||||
|
||||
def final_read(self):
|
||||
return ''
|
||||
|
||||
def rewrite_text_stream_to_gen(cls, stream,
|
||||
rewrite_func,
|
||||
final_read_func,
|
||||
align_to_line):
|
||||
def rewrite_text_stream_to_gen(self, stream):
|
||||
"""
|
||||
Convert stream to generator using applying rewriting func
|
||||
to each portion of the stream.
|
||||
Align to line boundaries if needed.
|
||||
"""
|
||||
try:
|
||||
buff = ''
|
||||
buff = self.first_buff
|
||||
|
||||
if buff:
|
||||
yield buff.encode('iso-8859-1')
|
||||
|
||||
while True:
|
||||
buff = stream.read(BUFF_SIZE)
|
||||
if not buff:
|
||||
break
|
||||
|
||||
if align_to_line:
|
||||
if self.align_to_line:
|
||||
buff += stream.readline()
|
||||
|
||||
buff = rewrite_func(buff.decode('iso-8859-1'))
|
||||
buff = self.rewrite(buff.decode('iso-8859-1'))
|
||||
yield buff.encode('iso-8859-1')
|
||||
|
||||
# For adding a tail/handling final buffer
|
||||
buff = final_read_func()
|
||||
buff = self.final_read()
|
||||
if buff:
|
||||
yield buff.encode('iso-8859-1')
|
||||
|
||||
|
@ -237,7 +237,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
|
||||
def _rewrite_css(self, css_content):
|
||||
if css_content:
|
||||
return self.css_rewriter.rewrite(css_content)
|
||||
return self.css_rewriter.rewrite_complete(css_content)
|
||||
else:
|
||||
return ''
|
||||
|
||||
@ -245,7 +245,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
if not script_content:
|
||||
return ''
|
||||
|
||||
content = self.js_rewriter.rewrite(script_content)
|
||||
content = self.js_rewriter.rewrite_complete(script_content)
|
||||
if ensure_window:
|
||||
content = self.ADD_WINDOW.sub('window.\\1', content)
|
||||
|
||||
@ -456,7 +456,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
def final_read(self):
|
||||
self.out = self.AccumBuff()
|
||||
|
||||
self._internal_close()
|
||||
@ -468,6 +468,9 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
return self.final_read()
|
||||
|
||||
def _internal_close(self): # pragma: no cover
|
||||
raise NotImplementedError('Base method')
|
||||
|
||||
@ -562,8 +565,7 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||
comment_rewriter = HTMLRewriter(self.url_rewriter,
|
||||
defmod=self.defmod)
|
||||
|
||||
data = comment_rewriter.rewrite(data)
|
||||
data += comment_rewriter.close()
|
||||
data = comment_rewriter.rewrite_complete(data)
|
||||
self.out.write(data)
|
||||
else:
|
||||
self.parse_data(data)
|
||||
|
@ -21,8 +21,3 @@ class JSONPRewriter(StreamingRewriter):
|
||||
string = m_callback.group(1) + string[m_json.end(1):]
|
||||
return string
|
||||
|
||||
def close(self):
|
||||
return ''
|
||||
|
||||
|
||||
|
||||
|
@ -61,9 +61,6 @@ class RegexRewriter(StreamingRewriter):
|
||||
def rewrite(self, string):
|
||||
return self.regex.sub(lambda x: self.replace(x), string)
|
||||
|
||||
def close(self):
|
||||
return ''
|
||||
|
||||
def replace(self, m):
|
||||
i = 0
|
||||
for _, op, count in self.rules:
|
||||
|
Loading…
x
Reference in New Issue
Block a user