1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-31 03:04:12 +02:00
pywb/pywb/utils/test/test_bufferedreaders.py

121 lines
3.7 KiB
Python

r"""
# DecompressingBufferedReader Tests
#=================================================================
# DecompressingBufferedReader readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
' CDX N b a m s k r M S V g\n'
# detect not compressed
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\n'
# decompress with on the fly compression, default gzip compression
>>> DecompressingBufferedReader(BytesIO(compress('ABC\n1234\n'))).read()
'ABC\n1234\n'
# error: invalid compress type
>>> DecompressingBufferedReader(BytesIO(compress('ABC')), decomp_type = 'bzip2').read()
Traceback (most recent call last):
Exception: Decompression type not supported: bzip2
# error: compressed member, followed by not compressed -- considered invalid
>>> x = DecompressingBufferedReader(BytesIO(compress('ABC') + '123'), decomp_type = 'gzip')
>>> b = x.read()
>>> b = x.read_next_member()
>>> x.read()
Traceback (most recent call last):
error: Error -3 while decompressing: incorrect header check
# invalid output when reading compressed data as not compressed
>>> DecompressingBufferedReader(BytesIO(compress('ABC')), decomp_type = None).read() != 'ABC'
True
# DecompressingBufferedReader readline() with decompression (zipnum file, no header)
>>> DecompressingBufferedReader(open(test_zip_dir + 'zipnum-sample.cdx.gz', 'rb'), decomp_type = 'gzip').readline()
'com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz\n'
# test very small block size
>>> dbr = DecompressingBufferedReader(BytesIO('ABCDEFG\nHIJKLMN\nOPQR\nXYZ'), block_size = 3)
>>> dbr.readline(); dbr.readline(4); dbr.readline(); dbr.readline(); dbr.readline(2); dbr.readline(); dbr.readline()
'ABCDEFG\n'
'HIJK'
'LMN\n'
'OPQR\n'
'XY'
'Z'
''
# test zero length reads
>>> x = DecompressingBufferedReader(LimitReader(BytesIO('\r\n'), 1))
>>> x.readline(0); x.read(0)
''
''
# Chunk-Decoding Buffered Reader Tests
#=================================================================
Properly formatted chunked data:
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n0\r\n\r\n"));
>>> c.read() + c.read() + c.read()
'1234'
Non-chunked data:
>>> ChunkedDataReader(BytesIO("xyz123!@#")).read()
'xyz123!@#'
Non-chunked, compressed data
>>> ChunkedDataReader(BytesIO(compress('ABCDEF'))).read()
'ABCDEF'
Non-chunked, compressed data
>>> DecompressingBufferedReader(ChunkedDataReader(BytesIO(compress('\nABCDEF\nGHIJ')))).read()
'\nABCDEF\nGHIJ'
Starts like chunked data, but isn't:
>>> c = ChunkedDataReader(BytesIO("1\r\nxyz123!@#"));
>>> c.read() + c.read()
'1\r\nx123!@#'
Chunked data cut off part way through:
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n4\r\n12"));
>>> c.read() + c.read()
'123412'
Zero-Length chunk:
>>> ChunkedDataReader(BytesIO("0\r\n\r\n")).read()
''
Chunked data cut off with exceptions
>>> c = ChunkedDataReader(BytesIO("4\r\n1234\r\n4\r\n12"), raise_exceptions=True)
>>> c.read() + c.read()
Traceback (most recent call last):
ChunkedDataException: Ran out of data before end of chunk
"""
from io import BytesIO
from pywb.utils.bufferedreaders import ChunkedDataReader
from pywb.utils.bufferedreaders import DecompressingBufferedReader
from pywb.utils.loaders import LimitReader
from pywb import get_test_dir
import zlib
test_cdx_dir = get_test_dir() + 'cdx/'
test_zip_dir = get_test_dir() + 'zipcdx/'
def compress(buff):
compressobj = zlib.compressobj(6, zlib.DEFLATED, zlib.MAX_WBITS + 16)
compressed = compressobj.compress(buff)
compressed += compressobj.flush()
return compressed
if __name__ == "__main__":
import doctest
doctest.testmod()