mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: seperate stream_to_gen and text_rewriting_stream_to_gen
The regular stream_to_gen is much simpler and specifically for binary/unrewritten content. text_rewriting_stream_to_gen() performs rewriting. Use fixed buffer of 16384 for read size, allows for better steaming when using live rewrite
This commit is contained in:
parent
50bf7d2634
commit
aecc847ec1
@ -20,6 +20,8 @@ from pywb.utils.bufferedreaders import ChunkedDataReader
|
|||||||
class RewriteContent:
|
class RewriteContent:
|
||||||
HEAD_REGEX = re.compile(r'<\s*head\b[^>]*[>]+', re.I)
|
HEAD_REGEX = re.compile(r'<\s*head\b[^>]*[>]+', re.I)
|
||||||
|
|
||||||
|
BUFF_SIZE = 16384
|
||||||
|
|
||||||
def __init__(self, ds_rules_file=None, is_framed_replay=False):
|
def __init__(self, ds_rules_file=None, is_framed_replay=False):
|
||||||
self.ruleset = RuleSet(RewriteRules, 'rewrite',
|
self.ruleset = RuleSet(RewriteRules, 'rewrite',
|
||||||
default_rule_config={},
|
default_rule_config={},
|
||||||
@ -161,10 +163,10 @@ class RewriteContent:
|
|||||||
rewriter = rewriter_class(urlrewriter)
|
rewriter = rewriter_class(urlrewriter)
|
||||||
|
|
||||||
# Create rewriting generator
|
# Create rewriting generator
|
||||||
gen = self.stream_to_gen(stream,
|
gen = self.rewrite_text_stream_to_gen(stream,
|
||||||
rewrite_func=rewriter.rewrite,
|
rewrite_func=rewriter.rewrite,
|
||||||
final_read_func=rewriter.close,
|
final_read_func=rewriter.close,
|
||||||
first_buff=first_buff)
|
first_buff=first_buff)
|
||||||
|
|
||||||
return (status_headers, gen, True)
|
return (status_headers, gen, True)
|
||||||
|
|
||||||
@ -210,33 +212,48 @@ class RewriteContent:
|
|||||||
|
|
||||||
return buff
|
return buff
|
||||||
|
|
||||||
# Create a generator reading from a stream,
|
|
||||||
# with optional rewriting and final read call
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def stream_to_gen(stream, rewrite_func=None,
|
def stream_to_gen(stream):
|
||||||
final_read_func=None, first_buff=None):
|
"""
|
||||||
|
Convert stream to an iterator, reading BUFF_SIZE bytes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
buff = stream.read(RewriteContent.BUFF_SIZE)
|
||||||
|
yield buff
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
|
||||||
|
finally:
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rewrite_text_stream_to_gen(stream, rewrite_func,
|
||||||
|
final_read_func, first_buff):
|
||||||
|
"""
|
||||||
|
Convert stream to generator using applying rewriting func
|
||||||
|
to each portion of the stream. Align to line boundaries
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if first_buff:
|
if first_buff:
|
||||||
buff = first_buff
|
buff = first_buff
|
||||||
else:
|
else:
|
||||||
buff = stream.read()
|
buff = stream.read(RewriteContent.BUFF_SIZE)
|
||||||
if buff and (not hasattr(stream, 'closed') or
|
if buff and (not hasattr(stream, 'closed') or
|
||||||
not stream.closed):
|
not stream.closed):
|
||||||
buff += stream.readline()
|
buff += stream.readline()
|
||||||
|
|
||||||
while buff:
|
while buff:
|
||||||
if rewrite_func:
|
buff = rewrite_func(buff)
|
||||||
buff = rewrite_func(buff)
|
|
||||||
yield buff
|
yield buff
|
||||||
buff = stream.read()
|
buff = stream.read(RewriteContent.BUFF_SIZE)
|
||||||
if buff:
|
if buff:
|
||||||
buff += stream.readline()
|
buff += stream.readline()
|
||||||
|
|
||||||
# For adding a tail/handling final buffer
|
# For adding a tail/handling final buffer
|
||||||
if final_read_func:
|
buff = final_read_func()
|
||||||
buff = final_read_func()
|
if buff:
|
||||||
if buff:
|
yield buff
|
||||||
yield buff
|
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
stream.close()
|
stream.close()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user