1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

readers: support 'content-encoding: deflate' using different zlib decompression options

support default and alt settings for attempting to decompress deflate stream
tests: add tests with httpbin.org/deflate Fixes #115
This commit is contained in:
Ilya Kreymer 2015-06-24 13:11:33 -07:00
parent 69f6354934
commit 06fcc89de6
3 changed files with 37 additions and 10 deletions

View File

@ -73,6 +73,21 @@ class RewriteContent:
return (rewritten_headers, stream)
def _check_encoding(self, rewritten_headers, stream, enc):
if (rewritten_headers.
contains_removed_header('content-encoding', enc)):
#optimize: if already a ChunkedDataReader, add the encoding
if isinstance(stream, ChunkedDataReader):
stream.set_decomp(enc)
else:
stream = DecompressingBufferedReader(stream, decomp_type=enc)
return stream
def rewrite_content(self, urlrewriter, headers, stream,
head_insert_func=None, urlkey='',
cdx=None):
@ -114,14 +129,8 @@ class RewriteContent:
encoding = None
first_buff = ''
if (rewritten_headers.
contains_removed_header('content-encoding', 'gzip')):
#optimize: if already a ChunkedDataReader, add gzip
if isinstance(stream, ChunkedDataReader):
stream.set_decomp('gzip')
else:
stream = DecompressingBufferedReader(stream)
stream = self._check_encoding(rewritten_headers, stream, 'gzip')
stream = self._check_encoding(rewritten_headers, stream, 'deflate')
if mod == 'js_':
text_type, stream = self._resolve_text_type('js',

View File

@ -10,6 +10,14 @@ def gzip_decompressor():
return zlib.decompressobj(16 + zlib.MAX_WBITS)
def deflate_decompressor():
return zlib.decompressobj()
def deflate_decompressor_alt():
return zlib.decompressobj(-zlib.MAX_WBITS)
#=================================================================
class BufferedReader(object):
"""
@ -30,7 +38,9 @@ class BufferedReader(object):
"""
DECOMPRESSORS = {'gzip': gzip_decompressor}
DECOMPRESSORS = {'gzip': gzip_decompressor,
'deflate': deflate_decompressor,
'deflate_alt': deflate_decompressor_alt}
def __init__(self, stream, block_size=1024,
decomp_type=None,
@ -91,7 +101,11 @@ class BufferedReader(object):
except Exception:
# if first read attempt, assume non-gzipped stream
if self.num_read == 0:
self.decompressor = None
if self.decomp_type == 'deflate':
self._init_decomp('deflate_alt')
data = self._decompress(data)
else:
self.decompressor = None
# otherwise (partly decompressed), something is wrong
else:
raise

View File

@ -42,3 +42,7 @@ class TestLiveRewriter:
resp = self.testapp.get('/live/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
assert resp.status_int == 200
assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type
def test_deflate(self):
resp = self.testapp.get('/live/mp_/http://httpbin.org/deflate')
assert '"deflated": true' in resp.body