mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
blockloader: support for loader profiles, specified via 'profile+scheme://...' urls. Profiles specify additional settings (eg. credentials) that are not included in the url. To enabl
e custom profiles, BlockLoader.set_profile_loader(callable) to a callable that will return custom config, addresses #180
This commit is contained in:
parent
d11bd444ad
commit
8ad66249c7
@ -236,23 +236,34 @@ def read_last_line(fh, offset=256):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class BlockLoader(object):
|
class BaseLoader(object):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def load(self, url, offset=0, length=-1):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class BlockLoader(BaseLoader):
|
||||||
"""
|
"""
|
||||||
a loader which can stream blocks of content
|
a loader which can stream blocks of content
|
||||||
given a uri, offset and optional length.
|
given a uri, offset and optional length.
|
||||||
Currently supports: http/https and file/local file system
|
Currently supports: http/https and file/local file system
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
loaders = {}
|
||||||
|
profile_loader = None
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
self.cached = {}
|
self.cached = {}
|
||||||
self.args = args
|
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
|
|
||||||
def load(self, url, offset=0, length=-1):
|
def load(self, url, offset=0, length=-1):
|
||||||
loader = self._get_loader_for(url)
|
loader = self._get_loader_for_url(url)
|
||||||
return loader.load(url, offset, length)
|
return loader.load(url, offset, length)
|
||||||
|
|
||||||
def _get_loader_for(self, url):
|
def _get_loader_for_url(self, url):
|
||||||
"""
|
"""
|
||||||
Determine loading method based on uri
|
Determine loading method based on uri
|
||||||
"""
|
"""
|
||||||
@ -266,14 +277,41 @@ class BlockLoader(object):
|
|||||||
if loader:
|
if loader:
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
loader_cls = LOADERS.get(type_)
|
if '+' in type_:
|
||||||
if not loader_cls:
|
profile_name, scheme = type_.split('+', 1)
|
||||||
raise IOError('No Loader for type: ' + type_)
|
else:
|
||||||
|
profile_name = ''
|
||||||
|
scheme = type_
|
||||||
|
|
||||||
|
loader_cls = self._get_loader_class_for_type(scheme)
|
||||||
|
|
||||||
|
if not loader_cls:
|
||||||
|
raise IOError('No Loader for type: ' + scheme)
|
||||||
|
|
||||||
|
profile = self.kwargs
|
||||||
|
|
||||||
|
if self.profile_loader:
|
||||||
|
profile = self.profile_loader(profile_name, scheme)
|
||||||
|
|
||||||
|
loader = loader_cls(**profile)
|
||||||
|
|
||||||
loader = loader_cls(*self.args, **self.kwargs)
|
|
||||||
self.cached[type_] = loader
|
self.cached[type_] = loader
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
|
def _get_loader_class_for_type(self, type_):
|
||||||
|
loader_cls = self.loaders.get(type_)
|
||||||
|
return loader_cls
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init_default_loaders():
|
||||||
|
BlockLoader.loaders['http'] = HttpLoader
|
||||||
|
BlockLoader.loaders['https'] = HttpLoader
|
||||||
|
BlockLoader.loaders['s3'] = S3Loader
|
||||||
|
BlockLoader.loaders['file'] = LocalFileLoader
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_profile_loader(src):
|
||||||
|
BlockLoader.profile_loader = src
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _make_range_header(offset, length):
|
def _make_range_header(offset, length):
|
||||||
@ -286,10 +324,7 @@ class BlockLoader(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class LocalFileLoader(object):
|
class LocalFileLoader(BaseLoader):
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def load(self, url, offset=0, length=-1):
|
def load(self, url, offset=0, length=-1):
|
||||||
"""
|
"""
|
||||||
Load a file-like reader from the local file system
|
Load a file-like reader from the local file system
|
||||||
@ -329,9 +364,11 @@ class LocalFileLoader(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HttpLoader(object):
|
class HttpLoader(BaseLoader):
|
||||||
def __init__(self, cookie_maker=None, *args, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
self.cookie_maker = cookie_maker
|
self.cookie_maker = kwargs.get('cookie_maker')
|
||||||
|
if not self.cookie_maker:
|
||||||
|
self.cookie_maker = kwargs.get('cookie')
|
||||||
self.session = None
|
self.session = None
|
||||||
|
|
||||||
def load(self, url, offset, length):
|
def load(self, url, offset, length):
|
||||||
@ -357,17 +394,19 @@ class HttpLoader(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class S3Loader(object):
|
class S3Loader(BaseLoader):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
self.s3conn = None
|
self.s3conn = None
|
||||||
|
self.aws_access_key_id = kwargs.get('aws_access_key_id')
|
||||||
|
self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
|
||||||
|
|
||||||
def load(self, url, offset, length):
|
def load(self, url, offset, length):
|
||||||
if not s3_avail: #pragma: no cover
|
if not s3_avail: #pragma: no cover
|
||||||
raise IOError('To load from s3 paths, ' +
|
raise IOError('To load from s3 paths, ' +
|
||||||
'you must install boto: pip install boto')
|
'you must install boto: pip install boto')
|
||||||
|
|
||||||
aws_access_key_id = None
|
aws_access_key_id = self.aws_access_key_id
|
||||||
aws_secret_access_key = None
|
aws_secret_access_key = self.aws_secret_access_key
|
||||||
|
|
||||||
parts = urlsplit(url)
|
parts = urlsplit(url)
|
||||||
|
|
||||||
@ -495,12 +534,6 @@ class LimitReader(object):
|
|||||||
|
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
#=================================================================
|
BlockLoader.init_default_loaders()
|
||||||
LOADERS = {'http': HttpLoader,
|
|
||||||
'https': HttpLoader,
|
|
||||||
's3': S3Loader,
|
|
||||||
'file': LocalFileLoader
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,11 +37,11 @@ Traceback (most recent call last):
|
|||||||
IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
|
IOError: [Errno 2] No such file or directory: '_x_no_such_file_'
|
||||||
|
|
||||||
# HMAC Cookie Maker
|
# HMAC Cookie Maker
|
||||||
>>> print_str(BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
|
>>> print_str(BlockLoader(cookie_maker=HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read())
|
||||||
'Example Domain'
|
'Example Domain'
|
||||||
|
|
||||||
# fixed cookie, range request
|
# fixed cookie, range request
|
||||||
>>> print_str(BlockLoader('some=value').load('http://example.com', 41, 14).read())
|
>>> print_str(BlockLoader(cookie='some=value').load('http://example.com', 41, 14).read())
|
||||||
'Example Domain'
|
'Example Domain'
|
||||||
|
|
||||||
# range request
|
# range request
|
||||||
|
@ -57,7 +57,7 @@ class ArcWarcRecordLoader(object):
|
|||||||
def __init__(self, loader=None, cookie_maker=None, block_size=8192,
|
def __init__(self, loader=None, cookie_maker=None, block_size=8192,
|
||||||
verify_http=True):
|
verify_http=True):
|
||||||
if not loader:
|
if not loader:
|
||||||
loader = BlockLoader(cookie_maker)
|
loader = BlockLoader(cookie_maker=cookie_maker)
|
||||||
|
|
||||||
self.loader = loader
|
self.loader = loader
|
||||||
self.block_size = block_size
|
self.block_size = block_size
|
||||||
|
Loading…
x
Reference in New Issue
Block a user