mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
decompressingbufferedreader: default to 'gzip' decompression instead of
none. ChunkedDataReader also automatically attempts decompression, by default Add tests to verify
This commit is contained in:
parent
02fe78cb0b
commit
8897a0a7c9
@ -30,7 +30,7 @@ class DecompressingBufferedReader(object):
|
||||
DECOMPRESSORS = {'gzip': gzip_decompressor}
|
||||
|
||||
def __init__(self, stream, block_size=1024,
|
||||
decomp_type=None,
|
||||
decomp_type='gzip',
|
||||
starting_data=None):
|
||||
self.stream = stream
|
||||
self.block_size = block_size
|
||||
|
@ -10,8 +10,8 @@ r"""
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
||||
' CDX N b a m s k r M S V g\n'
|
||||
|
||||
# decompress with on the fly compression
|
||||
>>> DecompressingBufferedReader(BytesIO(compress('ABC\n1234\n')), decomp_type = 'gzip').read()
|
||||
# decompress with on the fly compression, default gzip compression
|
||||
>>> DecompressingBufferedReader(BytesIO(compress('ABC\n1234\n'))).read()
|
||||
'ABC\n1234\n'
|
||||
|
||||
# error: invalid compress type
|
||||
@ -27,6 +27,11 @@ Exception: Decompression type not supported: bzip2
|
||||
Traceback (most recent call last):
|
||||
error: Error -3 while decompressing: incorrect header check
|
||||
|
||||
# invalid output when reading compressed data as not compressed
|
||||
>>> DecompressingBufferedReader(BytesIO(compress('ABC')), decomp_type = None).read() != 'ABC'
|
||||
True
|
||||
|
||||
|
||||
# DecompressingBufferedReader readline() with decompression (zipnum file, no header)
|
||||
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
|
||||
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\n'
|
||||
@ -60,6 +65,14 @@ Non-chunked data:
|
||||
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
|
||||
'xyz123!@#'
|
||||
|
||||
Non-chunked, compressed data
|
||||
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
|
||||
'ABCDEF'
|
||||
|
||||
Non-chunked, compressed data
|
||||
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
|
||||
'\nABCDEF\nGHIJ'
|
||||
|
||||
Starts like chunked data, but isn't:
|
||||
>>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
|
||||
>>> c.read() + c.read()
|
||||
|
Loading…
x
Reference in New Issue
Block a user