1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

blockloader: support for loader profiles, specified via 'profile+scheme://...' urls. Profiles specify additional settings (eg. credentials) that are not included in the url. To enabl

e custom profiles, BlockLoader.set_profile_loader(callable) to a callable that will return custom config, addresses #180
This commit is contained in:
Ilya Kreymer 2016-05-18 16:34:58 -07:00
parent d11bd444ad
commit 8ad66249c7
3 changed files with 64 additions and 31 deletions

View File

@ -236,23 +236,34 @@ def read_last_line(fh, offset=256):
#=================================================================
class BlockLoader(object):
class BaseLoader(object):
def __init__(self, **kwargs):
pass
def load(self, url, offset=0, length=-1):
raise NotImplemented()
#=================================================================
class BlockLoader(BaseLoader):
"""
a loader which can stream blocks of content
given a uri, offset and optional length.
Currently supports: http/https and file/local file system
"""
def __init__(self, *args, **kwargs):
loaders = {}
profile_loader = None
def __init__(self, **kwargs):
self.cached = {}
self.args = args
self.kwargs = kwargs
def load(self, url, offset=0, length=-1):
loader = self._get_loader_for(url)
loader = self._get_loader_for_url(url)
return loader.load(url, offset, length)
def _get_loader_for(self, url):
def _get_loader_for_url(self, url):
"""
Determine loading method based on uri
"""
@ -266,14 +277,41 @@ class BlockLoader(object):
if loader:
return loader
loader_cls = LOADERS.get(type_)
if not loader_cls:
raise IOError('No Loader for type: ' + type_)
if '+' in type_:
profile_name, scheme = type_.split('+', 1)
else:
profile_name = ''
scheme = type_
loader_cls = self._get_loader_class_for_type(scheme)
if not loader_cls:
raise IOError('No Loader for type: ' + scheme)
profile = self.kwargs
if self.profile_loader:
profile = self.profile_loader(profile_name, scheme)
loader = loader_cls(**profile)
loader = loader_cls(*self.args, **self.kwargs)
self.cached[type_] = loader
return loader
def _get_loader_class_for_type(self, type_):
loader_cls = self.loaders.get(type_)
return loader_cls
@staticmethod
def init_default_loaders():
BlockLoader.loaders['http'] = HttpLoader
BlockLoader.loaders['https'] = HttpLoader
BlockLoader.loaders['s3'] = S3Loader
BlockLoader.loaders['file'] = LocalFileLoader
@staticmethod
def set_profile_loader(src):
BlockLoader.profile_loader = src
@staticmethod
def _make_range_header(offset, length):
@ -286,10 +324,7 @@ class BlockLoader(object):
#=================================================================
class LocalFileLoader(object):
def __init__(self, *args, **kwargs):
pass
class LocalFileLoader(BaseLoader):
def load(self, url, offset=0, length=-1):
"""
Load a file-like reader from the local file system
@ -329,9 +364,11 @@ class LocalFileLoader(object):
#=================================================================
class HttpLoader(object):
def __init__(self, cookie_maker=None, *args, **kwargs):
self.cookie_maker = cookie_maker
class HttpLoader(BaseLoader):
def __init__(self, **kwargs):
self.cookie_maker = kwargs.get('cookie_maker')
if not self.cookie_maker:
self.cookie_maker = kwargs.get('cookie')
self.session = None
def load(self, url, offset, length):
@ -357,17 +394,19 @@ class HttpLoader(object):
#=================================================================
class S3Loader(object):
def __init__(self, *args, **kwargs):
class S3Loader(BaseLoader):
def __init__(self, **kwargs):
self.s3conn = None
self.aws_access_key_id = kwargs.get('aws_access_key_id')
self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
def load(self, url, offset, length):
if not s3_avail: #pragma: no cover
raise IOError('To load from s3 paths, ' +
'you must install boto: pip install boto')
aws_access_key_id = None
aws_secret_access_key = None
aws_access_key_id = self.aws_access_key_id
aws_secret_access_key = self.aws_secret_access_key
parts = urlsplit(url)
@ -495,12 +534,6 @@ class LimitReader(object):
return stream
#=================================================================
LOADERS = {'http': HttpLoader,
'https': HttpLoader,
's3': S3Loader,
'file': LocalFileLoader
}
# ============================================================================
BlockLoader.init_default_loaders()

View File

@ -37,11 +37,11 @@ Traceback (most recent call last):
IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
# HMAC Cookie Maker
>>> print_str(BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
>>> print_str(BlockLoader(cookie_maker=HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
'Example Domain'
# fixed cookie, range request
>>> print_str(BlockLoader('some=value').load('http://example.com', 41, 14).read())
>>> print_str(BlockLoader(cookie='some=value').load('http://example.com', 41, 14).read())
'Example Domain'
# range request

View File

@ -57,7 +57,7 @@ class ArcWarcRecordLoader(object):
def __init__(self, loader=None, cookie_maker=None, block_size=8192,
verify_http=True):
if not loader:
loader = BlockLoader(cookie_maker)
loader = BlockLoader(cookie_maker=cookie_maker)
self.loader = loader
self.block_size = block_size