1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

blockloader: support for loader profiles, specified via 'profile+scheme://...' urls. Profiles specify additional settings (eg. credentials) that are not included in the url. To enabl

e custom profiles, BlockLoader.set_profile_loader(callable) to a callable that will return custom config, addresses #180
This commit is contained in:
Ilya Kreymer 2016-05-18 16:34:58 -07:00
parent d11bd444ad
commit 8ad66249c7
3 changed files with 64 additions and 31 deletions

View File

@ -236,23 +236,34 @@ def read_last_line(fh, offset=256):
#================================================================= #=================================================================
class BlockLoader(object): class BaseLoader(object):
def __init__(self, **kwargs):
pass
def load(self, url, offset=0, length=-1):
raise NotImplemented()
#=================================================================
class BlockLoader(BaseLoader):
""" """
a loader which can stream blocks of content a loader which can stream blocks of content
given a uri, offset and optional length. given a uri, offset and optional length.
Currently supports: http/https and file/local file system Currently supports: http/https and file/local file system
""" """
def __init__(self, *args, **kwargs): loaders = {}
profile_loader = None
def __init__(self, **kwargs):
self.cached = {} self.cached = {}
self.args = args
self.kwargs = kwargs self.kwargs = kwargs
def load(self, url, offset=0, length=-1): def load(self, url, offset=0, length=-1):
loader = self._get_loader_for(url) loader = self._get_loader_for_url(url)
return loader.load(url, offset, length) return loader.load(url, offset, length)
def _get_loader_for(self, url): def _get_loader_for_url(self, url):
""" """
Determine loading method based on uri Determine loading method based on uri
""" """
@ -266,14 +277,41 @@ class BlockLoader(object):
if loader: if loader:
return loader return loader
loader_cls = LOADERS.get(type_) if '+' in type_:
if not loader_cls: profile_name, scheme = type_.split('+', 1)
raise IOError('No Loader for type: ' + type_) else:
profile_name = ''
scheme = type_
loader_cls = self._get_loader_class_for_type(scheme)
if not loader_cls:
raise IOError('No Loader for type: ' + scheme)
profile = self.kwargs
if self.profile_loader:
profile = self.profile_loader(profile_name, scheme)
loader = loader_cls(**profile)
loader = loader_cls(*self.args, **self.kwargs)
self.cached[type_] = loader self.cached[type_] = loader
return loader return loader
def _get_loader_class_for_type(self, type_):
loader_cls = self.loaders.get(type_)
return loader_cls
@staticmethod
def init_default_loaders():
BlockLoader.loaders['http'] = HttpLoader
BlockLoader.loaders['https'] = HttpLoader
BlockLoader.loaders['s3'] = S3Loader
BlockLoader.loaders['file'] = LocalFileLoader
@staticmethod
def set_profile_loader(src):
BlockLoader.profile_loader = src
@staticmethod @staticmethod
def _make_range_header(offset, length): def _make_range_header(offset, length):
@ -286,10 +324,7 @@ class BlockLoader(object):
#================================================================= #=================================================================
class LocalFileLoader(object): class LocalFileLoader(BaseLoader):
def __init__(self, *args, **kwargs):
pass
def load(self, url, offset=0, length=-1): def load(self, url, offset=0, length=-1):
""" """
Load a file-like reader from the local file system Load a file-like reader from the local file system
@ -329,9 +364,11 @@ class LocalFileLoader(object):
#================================================================= #=================================================================
class HttpLoader(object): class HttpLoader(BaseLoader):
def __init__(self, cookie_maker=None, *args, **kwargs): def __init__(self, **kwargs):
self.cookie_maker = cookie_maker self.cookie_maker = kwargs.get('cookie_maker')
if not self.cookie_maker:
self.cookie_maker = kwargs.get('cookie')
self.session = None self.session = None
def load(self, url, offset, length): def load(self, url, offset, length):
@ -357,17 +394,19 @@ class HttpLoader(object):
#================================================================= #=================================================================
class S3Loader(object): class S3Loader(BaseLoader):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
self.s3conn = None self.s3conn = None
self.aws_access_key_id = kwargs.get('aws_access_key_id')
self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
def load(self, url, offset, length): def load(self, url, offset, length):
if not s3_avail: #pragma: no cover if not s3_avail: #pragma: no cover
raise IOError('To load from s3 paths, ' + raise IOError('To load from s3 paths, ' +
'you must install boto: pip install boto') 'you must install boto: pip install boto')
aws_access_key_id = None aws_access_key_id = self.aws_access_key_id
aws_secret_access_key = None aws_secret_access_key = self.aws_secret_access_key
parts = urlsplit(url) parts = urlsplit(url)
@ -495,12 +534,6 @@ class LimitReader(object):
return stream return stream
# ============================================================================
#================================================================= BlockLoader.init_default_loaders()
LOADERS = {'http': HttpLoader,
'https': HttpLoader,
's3': S3Loader,
'file': LocalFileLoader
}

View File

@ -37,11 +37,11 @@ Traceback (most recent call last):
IOError: [Errno 2] No such file or directory: '_x_no_such_file_' IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
# HMAC Cookie Maker # HMAC Cookie Maker
>>> print_str(BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()) >>> print_str(BlockLoader(cookie_maker=HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
'Example Domain' 'Example Domain'
# fixed cookie, range request # fixed cookie, range request
>>> print_str(BlockLoader('some=value').load('http://example.com', 41, 14).read()) >>> print_str(BlockLoader(cookie='some=value').load('http://example.com', 41, 14).read())
'Example Domain' 'Example Domain'
# range request # range request

View File

@ -57,7 +57,7 @@ class ArcWarcRecordLoader(object):
def __init__(self, loader=None, cookie_maker=None, block_size=8192, def __init__(self, loader=None, cookie_maker=None, block_size=8192,
verify_http=True): verify_http=True):
if not loader: if not loader:
loader = BlockLoader(cookie_maker) loader = BlockLoader(cookie_maker=cookie_maker)
self.loader = loader self.loader = loader
self.block_size = block_size self.block_size = block_size