From c52efa0f9b5dfa799ed036780f1b68ba38c87bf8 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 18 Dec 2016 20:57:17 -0800 Subject: [PATCH] loader improvements: add PackageLoader for pkg:// scheme use pkgutil.get_data() instead of pkg_resources template loading: load assets file through load() interface, use standard PackageLoader --- pywb/urlrewrite/templateview.py | 13 +++------ pywb/utils/loaders.py | 47 +++++++++++++++++++++++++-------- pywb/webagg/utils.py | 13 +++++---- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/pywb/urlrewrite/templateview.py b/pywb/urlrewrite/templateview.py index e6b8cdd3..443212d4 100644 --- a/pywb/urlrewrite/templateview.py +++ b/pywb/urlrewrite/templateview.py @@ -1,5 +1,7 @@ from pywb.utils.timeutils import timestamp_to_datetime, timestamp_to_sec from pywb.utils.timeutils import timestamp_now +from pywb.utils.loaders import load + from six.moves.urllib.parse import urlsplit from jinja2 import Environment @@ -15,13 +17,6 @@ import json import os -# ============================================================================ -class FileOnlyPackageLoader(PackageLoader): - def get_source(self, env, template): - dir_, file_ = os.path.split(template) - return super(FileOnlyPackageLoader, self).get_source(env, file_) - - # ============================================================================ class RelEnvironment(Environment): """Override join_path() to enable relative template paths.""" @@ -65,7 +60,7 @@ class JinjaEnv(object): # init assets if assets_path: - assets_loader = YAMLLoader(assets_path) + assets_loader = YAMLLoader(load(assets_path)) assets_env = assets_loader.load_environment() assets_env.resolver = PkgResResolver() jinja_env.assets_environment = assets_env @@ -78,7 +73,7 @@ class JinjaEnv(object): # add loaders for all specified packages for package in packages: - loaders.append(FileOnlyPackageLoader(package)) + loaders.append(PackageLoader(package)) return loaders diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 3841134b..a7bfa30e 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -12,7 +12,7 @@ from six.moves.urllib.request import pathname2url, url2pathname from six.moves.urllib.parse import urljoin, unquote_plus, urlsplit, urlencode import time -import pkg_resources +import pkgutil import base64 import cgi @@ -39,16 +39,20 @@ def to_file_url(filename): return url +#================================================================= +def load(filename): + return BlockLoader().load(filename) + + #================================================================= def load_yaml_config(config_file): import yaml config = None configdata = None try: - configdata = BlockLoader().load(config_file) + configdata = load(config_file) config = yaml.load(configdata) finally: - configdata.close() if configdata: configdata.close() @@ -310,6 +314,7 @@ class BlockLoader(BaseLoader): BlockLoader.loaders['https'] = HttpLoader BlockLoader.loaders['s3'] = S3Loader BlockLoader.loaders['file'] = LocalFileLoader + BlockLoader.loaders['pkg'] = PackageLoader @staticmethod def set_profile_loader(src): @@ -326,7 +331,33 @@ class BlockLoader(BaseLoader): #================================================================= -class LocalFileLoader(BaseLoader): +class PackageLoader(BaseLoader): + def load(self, url, offset=0, length=-1): + if url.startswith('pkg://'): + url = url[len('pkg://'):] + + # then, try as package.path/file + pkg_split = url.split('/', 1) + if len(pkg_split) == 1: + raise + + data = pkgutil.get_data(pkg_split[0], pkg_split[1]) + if offset > 0: + data = data[offset:] + + if length > -1: + data = data[:length] + + buff = BytesIO(data) + buff.name = url + return buff + + #afile = pkg_resources.resource_stream(pkg_split[0], + # pkg_split[1]) + + +#================================================================= +class LocalFileLoader(PackageLoader): def load(self, url, offset=0, length=-1): """ Load a file-like reader from the local file system @@ -348,13 +379,7 @@ class LocalFileLoader(BaseLoader): if file_only: raise - # then, try as package.path/file - pkg_split = url.split('/', 1) - if len(pkg_split) == 1: - raise - - afile = pkg_resources.resource_stream(pkg_split[0], - pkg_split[1]) + return super(LocalFileLoader, self).load(url, offset, length) if offset > 0: afile.seek(offset) diff --git a/pywb/webagg/utils.py b/pywb/webagg/utils.py index d7936fcc..66555851 100644 --- a/pywb/webagg/utils.py +++ b/pywb/webagg/utils.py @@ -9,6 +9,7 @@ from contextlib import closing from pywb.utils.timeutils import timestamp_to_http_date from pywb.utils.wbexception import BadRequestException +from pywb.utils.loaders import load_yaml_config from six.moves.urllib.parse import quote from tempfile import SpooledTemporaryFile @@ -223,22 +224,20 @@ def load_config(main_env_var, main_default_file='', overlay_env_var='', overlay_file=''): configfile = os.environ.get(main_env_var, main_default_file) + config = None if configfile: configfile = os.path.expandvars(configfile) - # Load config - with open(configfile, 'rb') as fh: - config = yaml.load(fh) - else: - config = {} + config = load_yaml_config(configfile) + + config = config or {} overlay_configfile = os.environ.get(overlay_env_var, overlay_file) if overlay_configfile: overlay_configfile = os.path.expandvars(overlay_configfile) - with open(overlay_configfile, 'rb') as fh: - config.update(yaml.load(fh)) + config.update(load_yaml_config(overlay_configfile)) return config