mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
bufferedreader cleanup:
* BufferedReader defaults to no decompression * DecompressingBufferedReader defaults to gzip decomp * ChunkedDataReader defaults to no gzip decomp, but decomp can be set later via set_decomp(). This allow chunked responses to be de-chunked but not decompressed (eg for non-text responses)
This commit is contained in:
parent
53ad67eb9c
commit
9cf5327e88
@ -81,7 +81,12 @@ class RewriteContent:
|
|||||||
|
|
||||||
if (rewritten_headers.
|
if (rewritten_headers.
|
||||||
contains_removed_header('content-encoding', 'gzip')):
|
contains_removed_header('content-encoding', 'gzip')):
|
||||||
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
|
|
||||||
|
#optimize: if already a ChunkedDataReader, add gzip
|
||||||
|
if isinstance(stream, ChunkedDataReader):
|
||||||
|
stream.set_decomp('gzip')
|
||||||
|
else:
|
||||||
|
stream = DecompressingBufferedReader(stream, decomp_type='gzip')
|
||||||
|
|
||||||
if rewritten_headers.charset:
|
if rewritten_headers.charset:
|
||||||
encoding = rewritten_headers.charset
|
encoding = rewritten_headers.charset
|
||||||
|
@ -11,7 +11,7 @@ def gzip_decompressor():
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class DecompressingBufferedReader(object):
|
class BufferedReader(object):
|
||||||
"""
|
"""
|
||||||
A wrapping line reader which wraps an existing reader.
|
A wrapping line reader which wraps an existing reader.
|
||||||
Read operations operate on underlying buffer, which is filled to
|
Read operations operate on underlying buffer, which is filled to
|
||||||
@ -20,9 +20,12 @@ class DecompressingBufferedReader(object):
|
|||||||
If an optional decompress type is specified,
|
If an optional decompress type is specified,
|
||||||
data is fed through the decompressor when read from the buffer.
|
data is fed through the decompressor when read from the buffer.
|
||||||
Currently supported decompression: gzip
|
Currently supported decompression: gzip
|
||||||
|
If unspecified, default decompression is None
|
||||||
|
|
||||||
If decompression fails on first try, data is assumed to be decompressed
|
If decompression is specified, and decompress fails on first try,
|
||||||
and no exception is thrown. If a failure occurs after data has been
|
data is assumed to not be compressed and no exception is thrown.
|
||||||
|
|
||||||
|
If a failure occurs after data has been
|
||||||
partially decompressed, the exception is propagated.
|
partially decompressed, the exception is propagated.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -30,7 +33,7 @@ class DecompressingBufferedReader(object):
|
|||||||
DECOMPRESSORS = {'gzip': gzip_decompressor}
|
DECOMPRESSORS = {'gzip': gzip_decompressor}
|
||||||
|
|
||||||
def __init__(self, stream, block_size=1024,
|
def __init__(self, stream, block_size=1024,
|
||||||
decomp_type='gzip',
|
decomp_type=None,
|
||||||
starting_data=None):
|
starting_data=None):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
self.block_size = block_size
|
self.block_size = block_size
|
||||||
@ -42,6 +45,12 @@ class DecompressingBufferedReader(object):
|
|||||||
self.num_read = 0
|
self.num_read = 0
|
||||||
self.buff_size = 0
|
self.buff_size = 0
|
||||||
|
|
||||||
|
def set_decomp(self, decomp_type):
|
||||||
|
if self.num_read > 0:
|
||||||
|
raise Exception('Attempting to change decompression mid-stream')
|
||||||
|
|
||||||
|
self._init_decomp(decomp_type)
|
||||||
|
|
||||||
def _init_decomp(self, decomp_type):
|
def _init_decomp(self, decomp_type):
|
||||||
if decomp_type:
|
if decomp_type:
|
||||||
try:
|
try:
|
||||||
@ -161,6 +170,18 @@ class DecompressingBufferedReader(object):
|
|||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class DecompressingBufferedReader(BufferedReader):
|
||||||
|
"""
|
||||||
|
A BufferedReader which defaults to gzip decompression,
|
||||||
|
(unless different type specified)
|
||||||
|
"""
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
if 'decomp_type' not in kwargs:
|
||||||
|
kwargs['decomp_type'] = 'gzip'
|
||||||
|
super(DecompressingBufferedReader, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ChunkedDataException(Exception):
|
class ChunkedDataException(Exception):
|
||||||
def __init__(self, msg, data=''):
|
def __init__(self, msg, data=''):
|
||||||
@ -169,7 +190,7 @@ class ChunkedDataException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ChunkedDataReader(DecompressingBufferedReader):
|
class ChunkedDataReader(BufferedReader):
|
||||||
r"""
|
r"""
|
||||||
A ChunkedDataReader is a DecompressingBufferedReader
|
A ChunkedDataReader is a DecompressingBufferedReader
|
||||||
which also supports de-chunking of the data if it happens
|
which also supports de-chunking of the data if it happens
|
||||||
|
@ -65,11 +65,15 @@ Non-chunked data:
|
|||||||
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
|
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
|
||||||
'xyz123!@#'
|
'xyz123!@#'
|
||||||
|
|
||||||
Non-chunked, compressed data
|
Non-chunked, compressed data, specify decomp_type
|
||||||
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
|
>>> ChunkedDataReader(BytesIO(compress('ABCDEF')), decomp_type='gzip').read()
|
||||||
'ABCDEF'
|
'ABCDEF'
|
||||||
|
|
||||||
Non-chunked, compressed data
|
Non-chunked, compressed data, specifiy compression seperately
|
||||||
|
>>> c = ChunkedDataReader(BytesIO(compress('ABCDEF'))); c.set_decomp('gzip'); c.read()
|
||||||
|
'ABCDEF'
|
||||||
|
|
||||||
|
Non-chunked, compressed data, wrap in DecompressingBufferedReader
|
||||||
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
|
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
|
||||||
'\nABCDEF\nGHIJ'
|
'\nABCDEF\nGHIJ'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user