1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00
pywb/pywb/utils/test/test_loaders.py
Ilya Kreymer 97182b71b7 refactor:
- merge pywb.urlrewrite -> pywb.rewrite, remove obsolete stuff (rewrite_content.py, rewrite_live.py, dsrules.py)
- move wbrequestresponse -> pywb.apps
- move pywb.webapp.handlers -> pywb.apps.static_handler
- remove pywb.webapp, pywb.framework packages
- disable old header_rewriter, content_rewriter tests
- finish renaming from previous warcserver refactor
- all other tests passing!
2017-05-23 19:08:29 -07:00

132 lines
3.6 KiB
Python

r"""
#=================================================================
# BlockLoader Tests (includes LimitReader)
# Ensure attempt to read more than 100 bytes, reads exactly 100 bytes
>>> len(BlockLoader().load(test_cdx_dir + 'iana.cdx', 0, 100).read(400))
100
# no length specified, read full amount requested
>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400
# no such file
#>>> len(BlockLoader().load('_x_no_such_file_', 0, 100).read(400)) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
# HMAC Cookie Maker
>>> print_str(BlockLoader(cookie_maker=HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
'Example Domain'
# fixed cookie, range request
>>> print_str(BlockLoader(cookie='some=value').load('http://example.com', 41, 14).read())
'Example Domain'
# range request
>>> print_str(BlockLoader().load('http://example.com', 1262).read())
'</html>\n'
# custom profile
>>> print_str(BlockLoader().load('local+http://example.com', 1262).read())
'</html>\n'
# unknown loader error
#>>> BlockLoader().load('foo://example.com', 10).read() # doctest: +IGNORE_EXCEPTION_DETAIL
#Traceback (most recent call last):
#IOError: No Loader for type: foo
# test with extra id, ensure 4 parts of the A-B=C-D form are present
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
4
# cookie extract tests
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'a')
'b'
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'c')
'd'
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x')
>>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x')
>>> extract_client_cookie({}, 'y')
# test read_last_line
>>> print_str(read_last_line(BytesIO(b'A\nB\nC')))
'C'
>>> print_str(read_last_line(BytesIO(b'Some Line\nLonger Line\nLongest Last Line LL'), offset=8))
'Longest Last Line LL'
>>> print_str(read_last_line(BytesIO(b'A\nBC')))
'BC'
>>> print_str(read_last_line(BytesIO(b'A\nBC\n')))
'BC\n'
>>> print_str(read_last_line(BytesIO(b'ABC')))
'ABC'
"""
#=================================================================
import re
import os
import pytest
import six
from six import StringIO
from io import BytesIO
import requests
from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.loaders import read_last_line
from warcio.bufferedreaders import DecompressingBufferedReader
from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/'
def test_s3_read_1():
pytest.importorskip('boto')
res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
offset=53235662,
length=2526)
buff = res.read()
assert len(buff) == 2526
reader = DecompressingBufferedReader(BytesIO(buff))
assert reader.readline() == b'WARC/1.0\r\n'
assert reader.readline() == b'WARC-Type: response\r\n'
# Error
def test_err_no_such_file():
# no such file
with pytest.raises(IOError):
len(BlockLoader().load('_x_no_such_file_', 0, 100).read('400'))
def test_err_unknown_loader():
# unknown loader error
with pytest.raises(IOError):
BlockLoader().load('foo://example.com', 10).read()
#IOError: No Loader for type: foo
def print_str(string):
return string.decode('utf-8') if six.PY3 else string
if __name__ == "__main__":
import doctest
doctest.testmod()