1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

bufferedreader cleanup:

* BufferedReader defaults to no decompression
* DecompressingBufferedReader defaults to gzip decomp
* ChunkedDataReader defaults to no gzip decomp, but decomp
can be set later via set_decomp().
This allow chunked responses to be de-chunked but not decompressed
(eg for non-text responses)
This commit is contained in:
Ilya Kreymer 2014-04-28 20:15:31 -07:00
parent 53ad67eb9c
commit 9cf5327e88
3 changed files with 39 additions and 9 deletions

View File

@ -81,7 +81,12 @@ class RewriteContent:
if (rewritten_headers. if (rewritten_headers.
contains_removed_header('content-encoding', 'gzip')): contains_removed_header('content-encoding', 'gzip')):
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
#optimize: if already a ChunkedDataReader, add gzip
if isinstance(stream, ChunkedDataReader):
stream.set_decomp('gzip')
else:
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
if rewritten_headers.charset: if rewritten_headers.charset:
encoding = rewritten_headers.charset encoding = rewritten_headers.charset

View File

@ -11,7 +11,7 @@ def gzip_decompressor():
#================================================================= #=================================================================
class DecompressingBufferedReader(object): class BufferedReader(object):
""" """
A wrapping line reader which wraps an existing reader. A wrapping line reader which wraps an existing reader.
Read operations operate on underlying buffer, which is filled to Read operations operate on underlying buffer, which is filled to
@ -20,9 +20,12 @@ class DecompressingBufferedReader(object):
If an optional decompress type is specified, If an optional decompress type is specified,
data is fed through the decompressor when read from the buffer. data is fed through the decompressor when read from the buffer.
Currently supported decompression: gzip Currently supported decompression: gzip
If unspecified, default decompression is None
If decompression fails on first try, data is assumed to be decompressed If decompression is specified, and decompress fails on first try,
and no exception is thrown. If a failure occurs after data has been data is assumed to not be compressed and no exception is thrown.
If a failure occurs after data has been
partially decompressed, the exception is propagated. partially decompressed, the exception is propagated.
""" """
@ -30,7 +33,7 @@ class DecompressingBufferedReader(object):
DECOMPRESSORS = {'gzip': gzip_decompressor} DECOMPRESSORS = {'gzip': gzip_decompressor}
def __init__(self, stream, block_size=1024, def __init__(self, stream, block_size=1024,
decomp_type='gzip', decomp_type=None,
starting_data=None): starting_data=None):
self.stream = stream self.stream = stream
self.block_size = block_size self.block_size = block_size
@ -42,6 +45,12 @@ class DecompressingBufferedReader(object):
self.num_read = 0 self.num_read = 0
self.buff_size = 0 self.buff_size = 0
def set_decomp(self, decomp_type):
if self.num_read > 0:
raise Exception('Attempting to change decompression mid-stream')
self._init_decomp(decomp_type)
def _init_decomp(self, decomp_type): def _init_decomp(self, decomp_type):
if decomp_type: if decomp_type:
try: try:
@ -161,6 +170,18 @@ class DecompressingBufferedReader(object):
self.stream = None self.stream = None
#=================================================================
class DecompressingBufferedReader(BufferedReader):
"""
A BufferedReader which defaults to gzip decompression,
(unless different type specified)
"""
def __init__(self, *args, **kwargs):
if 'decomp_type' not in kwargs:
kwargs['decomp_type'] = 'gzip'
super(DecompressingBufferedReader, self).__init__(*args, **kwargs)
#================================================================= #=================================================================
class ChunkedDataException(Exception): class ChunkedDataException(Exception):
def __init__(self, msg, data=''): def __init__(self, msg, data=''):
@ -169,7 +190,7 @@ class ChunkedDataException(Exception):
#================================================================= #=================================================================
class ChunkedDataReader(DecompressingBufferedReader): class ChunkedDataReader(BufferedReader):
r""" r"""
A ChunkedDataReader is a DecompressingBufferedReader A ChunkedDataReader is a DecompressingBufferedReader
which also supports de-chunking of the data if it happens which also supports de-chunking of the data if it happens

View File

@ -65,11 +65,15 @@ Non-chunked data:
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read() >>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#' 'xyz123!@#'
Non-chunked, compressed data Non-chunked, compressed data, specify decomp_type
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read() >>> ChunkedDataReader(BytesIO(compress('ABCDEF')), decomp_type='gzip').read()
'ABCDEF' 'ABCDEF'
Non-chunked, compressed data Non-chunked, compressed data, specifiy compression seperately
>>> c = ChunkedDataReader(BytesIO(compress('ABCDEF'))); c.set_decomp('gzip'); c.read()
'ABCDEF'
Non-chunked, compressed data, wrap in DecompressingBufferedReader
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read() >>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
'\nABCDEF\nGHIJ' '\nABCDEF\nGHIJ'