1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: if removing content-encoding, also remove the content-length as it will need to be recomputed!

proxy: for proxy mode, must buffer fully so that content-length can be added (may add chunked encoding later)
This commit is contained in:
Ilya Kreymer 2015-07-28 14:23:50 -07:00
parent 1b9161a69b
commit 0b4ceb9cde
2 changed files with 15 additions and 6 deletions

View File

@ -84,11 +84,13 @@ class RewriteContent:
else:
stream = DecompressingBufferedReader(stream, decomp_type=enc)
rewritten_headers.status_headers.remove_header('content-length')
return stream
def rewrite_content(self, urlrewriter, headers, stream,
def rewrite_content(self, urlrewriter, status_headers, stream,
head_insert_func=None, urlkey='',
cdx=None):
@ -96,7 +98,8 @@ class RewriteContent:
if (wb_url.is_identity or
(not head_insert_func and wb_url.is_banner_only)):
status_headers, stream = self.sanitize_content(headers, stream)
status_headers, stream = self.sanitize_content(status_headers,
stream)
return (status_headers, self.stream_to_gen(stream), False)
if wb_url.is_banner_only:
@ -106,7 +109,7 @@ class RewriteContent:
(rewritten_headers, stream) = self._rewrite_headers(urlrewriter,
rule,
headers,
status_headers,
stream)
status_headers = rewritten_headers.status_headers
@ -166,7 +169,7 @@ class RewriteContent:
stream,
first_buff)
content_len = headers.get_header('Content-Length')
content_len = status_headers.get_header('Content-Length')
try:
content_len = int(content_len)
except Exception:

View File

@ -170,7 +170,7 @@ class ReplayView(object):
result = (self.content_rewriter.
rewrite_content(urlrewriter,
headers=status_headers,
status_headers=status_headers,
stream=stream,
head_insert_func=head_insert_func,
urlkey=cdx['urlkey'],
@ -187,9 +187,15 @@ class ReplayView(object):
content_len = 0
if content_len <= 0:
# if proxy mode, must set content-length (or use chunked)
if wbrequest.options.get('is_proxy'):
max_size = 0
else:
max_size = self.buffer_max_size
response_iter = self.buffered_response(status_headers,
response_iter,
self.buffer_max_size)
max_size)
# Set Content-Location if not exact capture
if not self.redir_to_exact: