diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py index 43610852..8932bcfc 100644 --- a/pywb/rewrite/content_rewriter.py +++ b/pywb/rewrite/content_rewriter.py @@ -201,49 +201,48 @@ class BufferedRewriter(object): # ============================================================================ class StreamingRewriter(object): - def __init__(self, url_rewriter, align_to_line=True): + def __init__(self, url_rewriter, align_to_line=True, first_buff=''): self.url_rewriter = url_rewriter self.align_to_line = align_to_line + self.first_buff = first_buff def __call__(self, rwinfo): - gen = self.rewrite_text_stream_to_gen(rwinfo.content_stream, - rewrite_func=self.rewrite, - final_read_func=self.close, - align_to_line=self.align_to_line) - - return gen + return self.rewrite_text_stream_to_gen(rwinfo.content_stream) def rewrite(self, string): return string - def close(self): + def rewrite_complete(self, string): + return self.first_buff + self.rewrite(string) + self.final_read() + + def final_read(self): return '' - def rewrite_text_stream_to_gen(cls, stream, - rewrite_func, - final_read_func, - align_to_line): + def rewrite_text_stream_to_gen(self, stream): """ Convert stream to generator using applying rewriting func to each portion of the stream. Align to line boundaries if needed. """ try: - buff = '' + buff = self.first_buff + + if buff: + yield buff.encode('iso-8859-1') while True: buff = stream.read(BUFF_SIZE) if not buff: break - if align_to_line: + if self.align_to_line: buff += stream.readline() - buff = rewrite_func(buff.decode('iso-8859-1')) + buff = self.rewrite(buff.decode('iso-8859-1')) yield buff.encode('iso-8859-1') # For adding a tail/handling final buffer - buff = final_read_func() + buff = self.final_read() if buff: yield buff.encode('iso-8859-1') diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index 261160a4..9ad6a7d2 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -237,7 +237,7 @@ class HTMLRewriterMixin(StreamingRewriter): def _rewrite_css(self, css_content): if css_content: - return self.css_rewriter.rewrite(css_content) + return self.css_rewriter.rewrite_complete(css_content) else: return '' @@ -245,7 +245,7 @@ class HTMLRewriterMixin(StreamingRewriter): if not script_content: return '' - content = self.js_rewriter.rewrite(script_content) + content = self.js_rewriter.rewrite_complete(script_content) if ensure_window: content = self.ADD_WINDOW.sub('window.\\1', content) @@ -456,7 +456,7 @@ class HTMLRewriterMixin(StreamingRewriter): return result - def close(self): + def final_read(self): self.out = self.AccumBuff() self._internal_close() @@ -468,6 +468,9 @@ class HTMLRewriterMixin(StreamingRewriter): return result + def close(self): + return self.final_read() + def _internal_close(self): # pragma: no cover raise NotImplementedError('Base method') @@ -562,8 +565,7 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser): comment_rewriter = HTMLRewriter(self.url_rewriter, defmod=self.defmod) - data = comment_rewriter.rewrite(data) - data += comment_rewriter.close() + data = comment_rewriter.rewrite_complete(data) self.out.write(data) else: self.parse_data(data) diff --git a/pywb/rewrite/jsonp_rewriter.py b/pywb/rewrite/jsonp_rewriter.py index fa04332c..6d3325b1 100644 --- a/pywb/rewrite/jsonp_rewriter.py +++ b/pywb/rewrite/jsonp_rewriter.py @@ -21,8 +21,3 @@ class JSONPRewriter(StreamingRewriter): string = m_callback.group(1) + string[m_json.end(1):] return string - def close(self): - return '' - - - diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index ad807e7d..c182d14b 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -61,9 +61,6 @@ class RegexRewriter(StreamingRewriter): def rewrite(self, string): return self.regex.sub(lambda x: self.replace(x), string) - def close(self): - return '' - def replace(self, m): i = 0 for _, op, count in self.rules: