diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py index 105134ea..6bb300c4 100644 --- a/pywb/rewrite/rewrite_content.py +++ b/pywb/rewrite/rewrite_content.py @@ -73,6 +73,21 @@ class RewriteContent: return (rewritten_headers, stream) + + def _check_encoding(self, rewritten_headers, stream, enc): + if (rewritten_headers. + contains_removed_header('content-encoding', enc)): + + #optimize: if already a ChunkedDataReader, add the encoding + if isinstance(stream, ChunkedDataReader): + stream.set_decomp(enc) + else: + stream = DecompressingBufferedReader(stream, decomp_type=enc) + + return stream + + + def rewrite_content(self, urlrewriter, headers, stream, head_insert_func=None, urlkey='', cdx=None): @@ -114,14 +129,8 @@ class RewriteContent: encoding = None first_buff = '' - if (rewritten_headers. - contains_removed_header('content-encoding', 'gzip')): - - #optimize: if already a ChunkedDataReader, add gzip - if isinstance(stream, ChunkedDataReader): - stream.set_decomp('gzip') - else: - stream = DecompressingBufferedReader(stream) + stream = self._check_encoding(rewritten_headers, stream, 'gzip') + stream = self._check_encoding(rewritten_headers, stream, 'deflate') if mod == 'js_': text_type, stream = self._resolve_text_type('js', diff --git a/pywb/utils/bufferedreaders.py b/pywb/utils/bufferedreaders.py index 54457f94..a895b490 100644 --- a/pywb/utils/bufferedreaders.py +++ b/pywb/utils/bufferedreaders.py @@ -10,6 +10,14 @@ def gzip_decompressor(): return zlib.decompressobj(16 + zlib.MAX_WBITS) +def deflate_decompressor(): + return zlib.decompressobj() + + +def deflate_decompressor_alt(): + return zlib.decompressobj(-zlib.MAX_WBITS) + + #================================================================= class BufferedReader(object): """ @@ -30,7 +38,9 @@ class BufferedReader(object): """ - DECOMPRESSORS = {'gzip': gzip_decompressor} + DECOMPRESSORS = {'gzip': gzip_decompressor, + 'deflate': deflate_decompressor, + 'deflate_alt': deflate_decompressor_alt} def __init__(self, stream, block_size=1024, decomp_type=None, @@ -91,7 +101,11 @@ class BufferedReader(object): except Exception: # if first read attempt, assume non-gzipped stream if self.num_read == 0: - self.decompressor = None + if self.decomp_type == 'deflate': + self._init_decomp('deflate_alt') + data = self._decompress(data) + else: + self.decompressor = None # otherwise (partly decompressed), something is wrong else: raise diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 87c17aab..e77ab1ae 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -42,3 +42,7 @@ class TestLiveRewriter: resp = self.testapp.get('/live/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M') assert resp.status_int == 200 assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type + + def test_deflate(self): + resp = self.testapp.get('/live/mp_/http://httpbin.org/deflate') + assert '"deflated": true' in resp.body