mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
bufferedreader cleanup:
* BufferedReader defaults to no decompression * DecompressingBufferedReader defaults to gzip decomp * ChunkedDataReader defaults to no gzip decomp, but decomp can be set later via set_decomp(). This allow chunked responses to be de-chunked but not decompressed (eg for non-text responses)
This commit is contained in:
parent
53ad67eb9c
commit
9cf5327e88
@ -81,7 +81,12 @@ class RewriteContent:
|
||||
|
||||
if (rewritten_headers.
|
||||
contains_removed_header('content-encoding', 'gzip')):
|
||||
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
|
||||
|
||||
#optimize: if already a ChunkedDataReader, add gzip
|
||||
if isinstance(stream, ChunkedDataReader):
|
||||
stream.set_decomp('gzip')
|
||||
else:
|
||||
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
|
||||
|
||||
if rewritten_headers.charset:
|
||||
encoding = rewritten_headers.charset
|
||||
|
@ -11,7 +11,7 @@ def gzip_decompressor():
|
||||
|
||||
|
||||
#=================================================================
|
||||
class DecompressingBufferedReader(object):
|
||||
class BufferedReader(object):
|
||||
"""
|
||||
A wrapping line reader which wraps an existing reader.
|
||||
Read operations operate on underlying buffer, which is filled to
|
||||
@ -20,9 +20,12 @@ class DecompressingBufferedReader(object):
|
||||
If an optional decompress type is specified,
|
||||
data is fed through the decompressor when read from the buffer.
|
||||
Currently supported decompression: gzip
|
||||
If unspecified, default decompression is None
|
||||
|
||||
If decompression fails on first try, data is assumed to be decompressed
|
||||
and no exception is thrown. If a failure occurs after data has been
|
||||
If decompression is specified, and decompress fails on first try,
|
||||
data is assumed to not be compressed and no exception is thrown.
|
||||
|
||||
If a failure occurs after data has been
|
||||
partially decompressed, the exception is propagated.
|
||||
|
||||
"""
|
||||
@ -30,7 +33,7 @@ class DecompressingBufferedReader(object):
|
||||
DECOMPRESSORS = {'gzip': gzip_decompressor}
|
||||
|
||||
def __init__(self, stream, block_size=1024,
|
||||
decomp_type='gzip',
|
||||
decomp_type=None,
|
||||
starting_data=None):
|
||||
self.stream = stream
|
||||
self.block_size = block_size
|
||||
@ -42,6 +45,12 @@ class DecompressingBufferedReader(object):
|
||||
self.num_read = 0
|
||||
self.buff_size = 0
|
||||
|
||||
def set_decomp(self, decomp_type):
|
||||
if self.num_read > 0:
|
||||
raise Exception('Attempting to change decompression mid-stream')
|
||||
|
||||
self._init_decomp(decomp_type)
|
||||
|
||||
def _init_decomp(self, decomp_type):
|
||||
if decomp_type:
|
||||
try:
|
||||
@ -161,6 +170,18 @@ class DecompressingBufferedReader(object):
|
||||
self.stream = None
|
||||
|
||||
|
||||
#=================================================================
|
||||
class DecompressingBufferedReader(BufferedReader):
|
||||
"""
|
||||
A BufferedReader which defaults to gzip decompression,
|
||||
(unless different type specified)
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'decomp_type' not in kwargs:
|
||||
kwargs['decomp_type'] = 'gzip'
|
||||
super(DecompressingBufferedReader, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ChunkedDataException(Exception):
|
||||
def __init__(self, msg, data=''):
|
||||
@ -169,7 +190,7 @@ class ChunkedDataException(Exception):
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ChunkedDataReader(DecompressingBufferedReader):
|
||||
class ChunkedDataReader(BufferedReader):
|
||||
r"""
|
||||
A ChunkedDataReader is a DecompressingBufferedReader
|
||||
which also supports de-chunking of the data if it happens
|
||||
|
@ -65,11 +65,15 @@ Non-chunked data:
|
||||
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
|
||||
'xyz123!@#'
|
||||
|
||||
Non-chunked, compressed data
|
||||
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
|
||||
Non-chunked, compressed data, specify decomp_type
|
||||
>>> ChunkedDataReader(BytesIO(compress('ABCDEF')), decomp_type='gzip').read()
|
||||
'ABCDEF'
|
||||
|
||||
Non-chunked, compressed data
|
||||
Non-chunked, compressed data, specifiy compression seperately
|
||||
>>> c = ChunkedDataReader(BytesIO(compress('ABCDEF'))); c.set_decomp('gzip'); c.read()
|
||||
'ABCDEF'
|
||||
|
||||
Non-chunked, compressed data, wrap in DecompressingBufferedReader
|
||||
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
|
||||
'\nABCDEF\nGHIJ'
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user