1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

decompressingbufferedreader: default to 'gzip' decompression instead of

none. ChunkedDataReader also automatically attempts decompression, by default
Add tests to verify
This commit is contained in:
Ilya Kreymer 2014-04-08 21:49:04 -07:00
parent 02fe78cb0b
commit 8897a0a7c9
2 changed files with 16 additions and 3 deletions

View File

@ -30,7 +30,7 @@ class DecompressingBufferedReader(object):
DECOMPRESSORS = {'gzip': gzip_decompressor}
def __init__(self, stream, block_size=1024,
decomp_type=None,
decomp_type='gzip',
starting_data=None):
self.stream = stream
self.block_size = block_size

View File

@ -10,8 +10,8 @@ r"""
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\n'
# decompress with on the fly compression
>>> DecompressingBufferedReader(BytesIO(compress('ABC\n1234\n')), decomp_type = 'gzip').read()
# decompress with on the fly compression, default gzip compression
>>> DecompressingBufferedReader(BytesIO(compress('ABC\n1234\n'))).read()
'ABC\n1234\n'
# error: invalid compress type
@ -27,6 +27,11 @@ Exception: Decompression type not supported: bzip2
Traceback (most recent call last):
error: Error -3 while decompressing: incorrect header check
# invalid output when reading compressed data as not compressed
>>> DecompressingBufferedReader(BytesIO(compress('ABC')), decomp_type = None).read() != 'ABC'
True
# DecompressingBufferedReader readline() with decompression (zipnum file, no header)
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\n'
@ -60,6 +65,14 @@ Non-chunked data:
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#'
Non-chunked, compressed data
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
'ABCDEF'
Non-chunked, compressed data
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
'\nABCDEF\nGHIJ'
Starts like chunked data, but isn't:
>>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
>>> c.read() + c.read()