1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

bufferedreader cleanup:

* BufferedReader defaults to no decompression
* DecompressingBufferedReader defaults to gzip decomp
* ChunkedDataReader defaults to no gzip decomp, but decomp
can be set later via set_decomp().
This allow chunked responses to be de-chunked but not decompressed
(eg for non-text responses)
This commit is contained in:
Ilya Kreymer 2014-04-28 20:15:31 -07:00
parent 53ad67eb9c
commit 9cf5327e88
3 changed files with 39 additions and 9 deletions

View File

@ -81,7 +81,12 @@ class RewriteContent:
if (rewritten_headers.
contains_removed_header('content-encoding', 'gzip')):
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
#optimize: if already a ChunkedDataReader, add gzip
if isinstance(stream, ChunkedDataReader):
stream.set_decomp('gzip')
else:
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
if rewritten_headers.charset:
encoding = rewritten_headers.charset

View File

@ -11,7 +11,7 @@ def gzip_decompressor():
#=================================================================
class DecompressingBufferedReader(object):
class BufferedReader(object):
"""
A wrapping line reader which wraps an existing reader.
Read operations operate on underlying buffer, which is filled to
@ -20,9 +20,12 @@ class DecompressingBufferedReader(object):
If an optional decompress type is specified,
data is fed through the decompressor when read from the buffer.
Currently supported decompression: gzip
If unspecified, default decompression is None
If decompression fails on first try, data is assumed to be decompressed
and no exception is thrown. If a failure occurs after data has been
If decompression is specified, and decompress fails on first try,
data is assumed to not be compressed and no exception is thrown.
If a failure occurs after data has been
partially decompressed, the exception is propagated.
"""
@ -30,7 +33,7 @@ class DecompressingBufferedReader(object):
DECOMPRESSORS = {'gzip': gzip_decompressor}
def __init__(self, stream, block_size=1024,
decomp_type='gzip',
decomp_type=None,
starting_data=None):
self.stream = stream
self.block_size = block_size
@ -42,6 +45,12 @@ class DecompressingBufferedReader(object):
self.num_read = 0
self.buff_size = 0
def set_decomp(self, decomp_type):
if self.num_read > 0:
raise Exception('Attempting to change decompression mid-stream')
self._init_decomp(decomp_type)
def _init_decomp(self, decomp_type):
if decomp_type:
try:
@ -161,6 +170,18 @@ class DecompressingBufferedReader(object):
self.stream = None
#=================================================================
class DecompressingBufferedReader(BufferedReader):
"""
A BufferedReader which defaults to gzip decompression,
(unless different type specified)
"""
def __init__(self, *args, **kwargs):
if 'decomp_type' not in kwargs:
kwargs['decomp_type'] = 'gzip'
super(DecompressingBufferedReader, self).__init__(*args, **kwargs)
#=================================================================
class ChunkedDataException(Exception):
def __init__(self, msg, data=''):
@ -169,7 +190,7 @@ class ChunkedDataException(Exception):
#=================================================================
class ChunkedDataReader(DecompressingBufferedReader):
class ChunkedDataReader(BufferedReader):
r"""
A ChunkedDataReader is a DecompressingBufferedReader
which also supports de-chunking of the data if it happens

View File

@ -65,11 +65,15 @@ Non-chunked data:
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#'
Non-chunked, compressed data
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
Non-chunked, compressed data, specify decomp_type
>>> ChunkedDataReader(BytesIO(compress('ABCDEF')), decomp_type='gzip').read()
'ABCDEF'
Non-chunked, compressed data
Non-chunked, compressed data, specifiy compression seperately
>>> c = ChunkedDataReader(BytesIO(compress('ABCDEF'))); c.set_decomp('gzip'); c.read()
'ABCDEF'
Non-chunked, compressed data, wrap in DecompressingBufferedReader
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
'\nABCDEF\nGHIJ'