mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
readers: support 'content-encoding: deflate' using different zlib decompression options
support default and alt settings for attempting to decompress deflate stream tests: add tests with httpbin.org/deflate Fixes #115
This commit is contained in:
parent
69f6354934
commit
06fcc89de6
@ -73,6 +73,21 @@ class RewriteContent:
|
||||
|
||||
return (rewritten_headers, stream)
|
||||
|
||||
|
||||
def _check_encoding(self, rewritten_headers, stream, enc):
|
||||
if (rewritten_headers.
|
||||
contains_removed_header('content-encoding', enc)):
|
||||
|
||||
#optimize: if already a ChunkedDataReader, add the encoding
|
||||
if isinstance(stream, ChunkedDataReader):
|
||||
stream.set_decomp(enc)
|
||||
else:
|
||||
stream = DecompressingBufferedReader(stream, decomp_type=enc)
|
||||
|
||||
return stream
|
||||
|
||||
|
||||
|
||||
def rewrite_content(self, urlrewriter, headers, stream,
|
||||
head_insert_func=None, urlkey='',
|
||||
cdx=None):
|
||||
@ -114,14 +129,8 @@ class RewriteContent:
|
||||
encoding = None
|
||||
first_buff = ''
|
||||
|
||||
if (rewritten_headers.
|
||||
contains_removed_header('content-encoding', 'gzip')):
|
||||
|
||||
#optimize: if already a ChunkedDataReader, add gzip
|
||||
if isinstance(stream, ChunkedDataReader):
|
||||
stream.set_decomp('gzip')
|
||||
else:
|
||||
stream = DecompressingBufferedReader(stream)
|
||||
stream = self._check_encoding(rewritten_headers, stream, 'gzip')
|
||||
stream = self._check_encoding(rewritten_headers, stream, 'deflate')
|
||||
|
||||
if mod == 'js_':
|
||||
text_type, stream = self._resolve_text_type('js',
|
||||
|
@ -10,6 +10,14 @@ def gzip_decompressor():
|
||||
return zlib.decompressobj(16 + zlib.MAX_WBITS)
|
||||
|
||||
|
||||
def deflate_decompressor():
|
||||
return zlib.decompressobj()
|
||||
|
||||
|
||||
def deflate_decompressor_alt():
|
||||
return zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class BufferedReader(object):
|
||||
"""
|
||||
@ -30,7 +38,9 @@ class BufferedReader(object):
|
||||
|
||||
"""
|
||||
|
||||
DECOMPRESSORS = {'gzip': gzip_decompressor}
|
||||
DECOMPRESSORS = {'gzip': gzip_decompressor,
|
||||
'deflate': deflate_decompressor,
|
||||
'deflate_alt': deflate_decompressor_alt}
|
||||
|
||||
def __init__(self, stream, block_size=1024,
|
||||
decomp_type=None,
|
||||
@ -91,7 +101,11 @@ class BufferedReader(object):
|
||||
except Exception:
|
||||
# if first read attempt, assume non-gzipped stream
|
||||
if self.num_read == 0:
|
||||
self.decompressor = None
|
||||
if self.decomp_type == 'deflate':
|
||||
self._init_decomp('deflate_alt')
|
||||
data = self._decompress(data)
|
||||
else:
|
||||
self.decompressor = None
|
||||
# otherwise (partly decompressed), something is wrong
|
||||
else:
|
||||
raise
|
||||
|
@ -42,3 +42,7 @@ class TestLiveRewriter:
|
||||
resp = self.testapp.get('/live/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
|
||||
assert resp.status_int == 200
|
||||
assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type
|
||||
|
||||
def test_deflate(self):
|
||||
resp = self.testapp.get('/live/mp_/http://httpbin.org/deflate')
|
||||
assert '"deflated": true' in resp.body
|
||||
|
Loading…
x
Reference in New Issue
Block a user