From f0a09760385dc6ac0803f75dd19c9d1ed6092695 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 2 Mar 2014 21:42:05 -0800 Subject: [PATCH] more refactoring! create 'framework' subpackage for general purpose components! contains routing, request/response, exceptions and wsgi wrappers update framework package for pep8 dsrules: using load_config_yaml() (pushed to utils) to init default config --- pywb/apps/wayback.py | 4 +- pywb/core/handlers.py | 2 +- pywb/{bootstrap => core}/pywb_init.py | 20 +++---- pywb/core/replay_views.py | 4 +- pywb/core/views.py | 2 +- pywb/dispatch/__init__.py | 0 pywb/{bootstrap => framework}/__init__.py | 0 .../{dispatch => framework}/archivalrouter.py | 52 +++++++++++-------- pywb/{dispatch => framework}/proxy.py | 33 ++++++------ .../test/test_archivalrouter.py | 2 +- .../test/test_wbrequestresponse.py | 2 +- pywb/{core => framework}/wbexceptions.py | 0 pywb/{core => framework}/wbrequestresponse.py | 39 ++++++++------ .../{bootstrap => framework}/wsgi_wrappers.py | 37 ++++++------- pywb/utils/dsrules.py | 31 ++++------- pywb/utils/loaders.py | 27 ++++++++-- tests/test_integration.py | 4 +- 17 files changed, 138 insertions(+), 121 deletions(-) rename pywb/{bootstrap => core}/pywb_init.py (90%) delete mode 100644 pywb/dispatch/__init__.py rename pywb/{bootstrap => framework}/__init__.py (100%) rename pywb/{dispatch => framework}/archivalrouter.py (77%) rename pywb/{dispatch => framework}/proxy.py (78%) rename pywb/{dispatch => framework}/test/test_archivalrouter.py (98%) rename pywb/{core => framework}/test/test_wbrequestresponse.py (98%) rename pywb/{core => framework}/wbexceptions.py (100%) rename pywb/{core => framework}/wbrequestresponse.py (76%) rename pywb/{bootstrap => framework}/wsgi_wrappers.py (84%) diff --git a/pywb/apps/wayback.py b/pywb/apps/wayback.py index beaf0b0c..0cda072b 100644 --- a/pywb/apps/wayback.py +++ b/pywb/apps/wayback.py @@ -1,5 +1,5 @@ -from pywb.bootstrap.wsgi_wrappers import init_app, start_wsgi_server -from pywb.bootstrap.pywb_init import create_wb_router +from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server +from pywb.core.pywb_init import create_wb_router #================================================================= # init pywb app diff --git a/pywb/core/handlers.py b/pywb/core/handlers.py index cbf2d71f..1984a4df 100644 --- a/pywb/core/handlers.py +++ b/pywb/core/handlers.py @@ -5,7 +5,7 @@ import time from pywb.rewrite.wburl import WbUrl from pywb.cdx.query import CDXQuery -from wbrequestresponse import WbResponse +from pywb.framework.wbrequestresponse import WbResponse from wbexceptions import WbException, NotFoundException from views import TextCapturesView diff --git a/pywb/bootstrap/pywb_init.py b/pywb/core/pywb_init.py similarity index 90% rename from pywb/bootstrap/pywb_init.py rename to pywb/core/pywb_init.py index d4382204..52df9f5f 100644 --- a/pywb/bootstrap/pywb_init.py +++ b/pywb/core/pywb_init.py @@ -1,25 +1,25 @@ -from pywb.dispatch.archivalrouter import ArchivalRouter, Route -from pywb.dispatch.proxy import ProxyArchivalRouter +from pywb.framework.archivalrouter import ArchivalRouter, Route +from pywb.framework.proxy import ProxyArchivalRouter from pywb.warc.recordloader import ArcWarcRecordLoader from pywb.warc.resolvingloader import ResolvingLoader from pywb.rewrite.rewrite_content import RewriteContent -from pywb.core.indexreader import IndexReader -from pywb.core.views import J2TemplateView, J2HtmlCapturesView -from pywb.core.handlers import WBHandler -from pywb.core.replay_views import ReplayView +from indexreader import IndexReader +from views import J2TemplateView, J2HtmlCapturesView +from replay_views import ReplayView -from pywb.core.handlers import CDXHandler, StaticHandler -from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler +from handlers import WBHandler +from handlers import CDXHandler, StaticHandler +from handlers import DebugEchoHandler, DebugEchoEnvHandler -from pywb.utils.loaders import BlockLoader import os import yaml import logging + #================================================================= DEFAULTS = { 'hostpaths': ['http://localhost:8080'], @@ -34,7 +34,7 @@ DEFAULTS = { 'static_routes': {'static/default': 'static/'}, - 'domain_specific_rules': 'rules.yaml', + 'domain_specific_rules': 'pywb/rules.yaml', } #================================================================= diff --git a/pywb/core/replay_views.py b/pywb/core/replay_views.py index bf046416..07997396 100644 --- a/pywb/core/replay_views.py +++ b/pywb/core/replay_views.py @@ -2,9 +2,9 @@ import StringIO from pywb.rewrite.url_rewriter import UrlRewriter from pywb.utils.bufferedreaders import ChunkedDataReader -from wbrequestresponse import WbResponse +from pywb.framework.wbrequestresponse import WbResponse -from wbexceptions import CaptureException, InternalRedirect +from pywb.framework.wbexceptions import CaptureException, InternalRedirect from pywb.warc.recordloader import ArchiveLoadFailed from pywb.utils.loaders import LimitReader diff --git a/pywb/core/views.py b/pywb/core/views.py index 520faa78..3be55eae 100644 --- a/pywb/core/views.py +++ b/pywb/core/views.py @@ -1,5 +1,5 @@ from pywb.utils.timeutils import timestamp_to_datetime -from wbrequestresponse import WbResponse +from pywb.framework.wbrequestresponse import WbResponse import urlparse import time diff --git a/pywb/dispatch/__init__.py b/pywb/dispatch/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pywb/bootstrap/__init__.py b/pywb/framework/__init__.py similarity index 100% rename from pywb/bootstrap/__init__.py rename to pywb/framework/__init__.py diff --git a/pywb/dispatch/archivalrouter.py b/pywb/framework/archivalrouter.py similarity index 77% rename from pywb/dispatch/archivalrouter.py rename to pywb/framework/archivalrouter.py index fb09fa1a..2ae3bb5f 100644 --- a/pywb/dispatch/archivalrouter.py +++ b/pywb/framework/archivalrouter.py @@ -1,15 +1,17 @@ import urlparse import re -from pywb.core.wbrequestresponse import WbRequest, WbResponse from pywb.rewrite.url_rewriter import UrlRewriter +from wbrequestresponse import WbRequest, WbResponse #================================================================= # ArchivalRouter -- route WB requests in archival mode #================================================================= -class ArchivalRouter: - def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None): +class ArchivalRouter(object): + def __init__(self, routes, hostpaths=None, abs_path=True, + home_view=None, error_view=None): + self.routes = routes self.fallback = ReferRedirect(hostpaths) self.abs_path = abs_path @@ -29,26 +31,27 @@ class ArchivalRouter: return self.fallback(env, self.routes) if self.fallback else None - def render_home_page(self): # render the homepage! if self.home_view: - return self.home_view.render_response(routes = self.routes) + return self.home_view.render_response(routes=self.routes) else: # default home page template text = '\n'.join(map(str, self.routes)) return WbResponse.text_response(text) + #================================================================= # Route by matching regex (or fixed prefix) # of request uri (excluding first '/') #================================================================= -class Route: +class Route(object): # match upto next / or ? or end - SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)' + SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)' + def __init__(self, regex, handler, coll_group=0, config={}, + lookahead=SLASH_QUERY_LOOKAHEAD): - def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD): self.path = regex if regex: self.regex = re.compile(regex + lookahead) @@ -59,12 +62,11 @@ class Route: self.coll_group = coll_group self._custom_init(config) - def __call__(self, env, use_abs_prefix): wbrequest = self.parse_request(env, use_abs_prefix) return self.handler(wbrequest) if wbrequest else None - def parse_request(self, env, use_abs_prefix, request_uri = None): + def parse_request(self, env, use_abs_prefix, request_uri=None): if not request_uri: request_uri = env['REL_REQUEST_URI'] @@ -75,10 +77,12 @@ class Route: matched_str = matcher.group(0) if matched_str: rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/' - wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri + # remove the '/' + rel_prefix part of uri + wb_url_str = request_uri[len(matched_str) + 2:] else: rel_prefix = env['SCRIPT_NAME'] + '/' - wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll + # the request_uri is the wb_url, since no coll + wb_url_str = request_uri[1:] coll = matcher.group(self.coll_group) @@ -88,20 +92,19 @@ class Route: rel_prefix=rel_prefix, coll=coll, use_abs_prefix=use_abs_prefix, - wburl_class = self.handler.get_wburl_type(), + wburl_class=self.handler.get_wburl_type(), urlrewriter_class=UrlRewriter) - # Allow for applying of additional filters self._apply_filters(wbrequest, matcher) return wbrequest - def _apply_filters(self, wbrequest, matcher): for filter in self.filters: last_grp = len(matcher.groups()) - wbrequest.query_filter.append(filter.format(matcher.group(last_grp))) + filter_str = filter.format(matcher.group(last_grp)) + wbrequest.query_filter.append(filter_str) def _custom_init(self, config): self.filters = config.get('filters', []) @@ -112,7 +115,8 @@ class Route: #================================================================= -# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings +# ReferRedirect -- redirect urls that have 'fallen through' +# based on the referrer settings #================================================================= class ReferRedirect: def __init__(self, match_prefixs): @@ -121,7 +125,6 @@ class ReferRedirect: else: self.match_prefixs = [match_prefixs] - def __call__(self, env, routes): referrer = env.get('HTTP_REFERER') @@ -133,7 +136,7 @@ class ReferRedirect: ref_split = urlparse.urlsplit(referrer) # ensure referrer starts with one of allowed hosts - if not any (referrer.startswith(i) for i in self.match_prefixs): + if not any(referrer.startswith(i) for i in self.match_prefixs): if ref_split.netloc != env.get('HTTP_HOST'): return None @@ -144,13 +147,12 @@ class ReferRedirect: if app_path: # must start with current app name, if not root if not path.startswith(app_path): - return None + return None path = path[len(app_path):] - for route in routes: - ref_request = route.parse_request(env, False, request_uri = path) + ref_request = route.parse_request(env, False, request_uri=path) if ref_request: break @@ -174,6 +176,10 @@ class ReferRedirect: # 2013/path.html -> /path.html rel_request_uri = rel_request_uri[len(timestamp_path) - 1:] - final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', '')) + final_url = urlparse.urlunsplit((ref_split.scheme, + ref_split.netloc, + rewriter.rewrite(rel_request_uri), + '', + '')) return WbResponse.redir_response(final_url) diff --git a/pywb/dispatch/proxy.py b/pywb/framework/proxy.py similarity index 78% rename from pywb/dispatch/proxy.py rename to pywb/framework/proxy.py index ffc74c47..cbebf4ae 100644 --- a/pywb/dispatch/proxy.py +++ b/pywb/framework/proxy.py @@ -1,15 +1,19 @@ -from pywb.core.wbrequestresponse import WbResponse, WbRequest +from wbrequestresponse import WbResponse, WbRequest from archivalrouter import ArchivalRouter import urlparse + #================================================================= # An experimental router which combines both archival and proxy modes -# http proxy mode support is very simple: only latest capture is available currently +# http proxy mode support is very simple so far: +# only latest capture is available currently #================================================================= - class ProxyArchivalRouter: - def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None): - self.archival = ArchivalRouter(routes, hostpaths, abs_path, home_view, error_view) + def __init__(self, routes, hostpaths=None, abs_path=True, + home_view=None, error_view=None): + + self.archival = ArchivalRouter(routes, hostpaths, abs_path, + home_view, error_view) self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view) self.error_view = error_view @@ -29,7 +33,7 @@ class ProxyArchivalRouter: # Only supports latest capture replay at the moment #================================================================= class ProxyRouter: - def __init__(self, handler, hostpaths = None, error_view = None): + def __init__(self, handler, hostpaths=None, error_view=None): self.handler = handler self.hostpaths = hostpaths @@ -56,27 +60,26 @@ class ProxyRouter: return self.handler(wbrequest) - # Proxy Auto-Config (PAC) script for the proxy def make_pac_response(self, env): server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT'] buff = 'function FindProxyForURL (url, host) {\n' - direct_cond =' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n' + direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n' for hostpath in self.hostpaths: parts = urlparse.urlsplit(hostpath).netloc.split(':') - buff += direct_cond.format(parts[0]) + buff += direct.format(parts[0]) - buff += direct_cond.format(env['SERVER_NAME']) + buff += direct.format(env['SERVER_NAME']) #buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0]) buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport) - return WbResponse.text_response(buff, content_type = 'application/x-ns-proxy-autoconfig') - + content_type = 'application/x-ns-proxy-autoconfig' + return WbResponse.text_response(buff, content_type=content_type) #================================================================= @@ -85,10 +88,11 @@ class ProxyRouter: class ProxyHttpsUrlRewriter: HTTP = 'http://' HTTPS = 'https://' + def __init__(self, wbrequest, prefix): pass - def rewrite(self, url, mod = None): + def rewrite(self, url, mod=None): if url.startswith(self.HTTPS): return self.HTTP + url[len(self.HTTPS):] else: @@ -97,6 +101,5 @@ class ProxyHttpsUrlRewriter: def get_timestamp_url(self, timestamp, url): return url - def get_abs_url(self, url = ''): + def get_abs_url(self, url=''): return url - diff --git a/pywb/dispatch/test/test_archivalrouter.py b/pywb/framework/test/test_archivalrouter.py similarity index 98% rename from pywb/dispatch/test/test_archivalrouter.py rename to pywb/framework/test/test_archivalrouter.py index a076c015..86df528a 100644 --- a/pywb/dispatch/test/test_archivalrouter.py +++ b/pywb/framework/test/test_archivalrouter.py @@ -84,7 +84,7 @@ False """ -from pywb.dispatch.archivalrouter import Route, ReferRedirect +from pywb.framework.archivalrouter import Route, ReferRedirect from pywb.core.handlers import BaseHandler, WbUrlHandler import pprint diff --git a/pywb/core/test/test_wbrequestresponse.py b/pywb/framework/test/test_wbrequestresponse.py similarity index 98% rename from pywb/core/test/test_wbrequestresponse.py rename to pywb/framework/test/test_wbrequestresponse.py index 09017564..977a8863 100644 --- a/pywb/core/test/test_wbrequestresponse.py +++ b/pywb/framework/test/test_wbrequestresponse.py @@ -41,7 +41,7 @@ from pywb.rewrite.wburl import WbUrl from pywb.rewrite.url_rewriter import UrlRewriter from pywb.utils.statusandheaders import StatusAndHeaders -from pywb.core.wbrequestresponse import WbRequest, WbResponse +from pywb.framework.wbrequestresponse import WbRequest, WbResponse def print_req_from_uri(request_uri, env={}, use_abs_prefix=False): diff --git a/pywb/core/wbexceptions.py b/pywb/framework/wbexceptions.py similarity index 100% rename from pywb/core/wbexceptions.py rename to pywb/framework/wbexceptions.py diff --git a/pywb/core/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py similarity index 76% rename from pywb/core/wbrequestresponse.py rename to pywb/framework/wbrequestresponse.py index 4a459c4b..3ef091d9 100644 --- a/pywb/core/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -26,7 +26,6 @@ class WbRequest: except KeyError: return '' - def __init__(self, env, request_uri=None, rel_prefix='', @@ -40,7 +39,10 @@ class WbRequest: self.env = env - self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI') + if request_uri: + self.request_uri = request_uri + else: + self.request_uri = env.get('REL_REQUEST_URI') self.coll = coll @@ -55,7 +57,6 @@ class WbRequest: else: self.wb_prefix = rel_prefix - if not wb_url_str: wb_url_str = '/' @@ -83,7 +84,6 @@ class WbRequest: # PERF env['X_PERF'] = {} - def _is_ajax(self): value = self.env.get('HTTP_X_REQUESTED_WITH') if not value: @@ -96,7 +96,6 @@ class WbRequest: return True return False - def __repr__(self): varlist = vars(self) varstr = pprint.pformat(varlist) @@ -111,32 +110,39 @@ class WbResponse: Holds a status_headers object and a response iter, to be returned to wsgi container. """ - def __init__(self, status_headers, value = []): + def __init__(self, status_headers, value=[]): self.status_headers = status_headers self.body = value @staticmethod - def text_stream(text, status = '200 OK', content_type = 'text/plain'): - return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text) + def text_stream(stream, status='200 OK', content_type='text/plain'): + status_headers = StatusAndHeaders(status, + [('Content-Type', content_type)]) + + return WbResponse(status_headers, value=stream) @staticmethod - def text_response(text, status = '200 OK', content_type = 'text/plain'): - return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text]) + def text_response(text, status='200 OK', content_type='text/plain'): + status_headers = StatusAndHeaders(status, + [('Content-Type', content_type)]) + + return WbResponse(status_headers, value=[text]) @staticmethod - def redir_response(location, status = '302 Redirect'): - return WbResponse(StatusAndHeaders(status, [('Location', location)])) - + def redir_response(location, status='302 Redirect'): + return WbResponse(StatusAndHeaders(status, + [('Location', location)])) def __call__(self, env, start_response): # PERF perfstats = env.get('X_PERF') if perfstats: - self.status_headers.headers.append(('X-Archive-Perf-Stats', str(perfstats))) + self.status_headers.headers.append(('X-Archive-Perf-Stats', + str(perfstats))) - - start_response(self.status_headers.statusline, self.status_headers.headers) + start_response(self.status_headers.statusline, + self.status_headers.headers) if env['REQUEST_METHOD'] == 'HEAD': if hasattr(self.body, 'close'): @@ -148,6 +154,5 @@ class WbResponse: else: return [str(self.body)] - def __repr__(self): return str(vars(self)) diff --git a/pywb/bootstrap/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py similarity index 84% rename from pywb/bootstrap/wsgi_wrappers.py rename to pywb/framework/wsgi_wrappers.py index 4dd04115..2811aa92 100644 --- a/pywb/bootstrap/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -1,8 +1,9 @@ from pywb.utils.wbexception import WbException -from pywb.core.wbexceptions import NotFoundException, InternalRedirect -from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders +from pywb.utils.loaders import load_yaml_config + +from wbexceptions import NotFoundException, InternalRedirect +from wbrequestresponse import WbResponse, StatusAndHeaders -from pywb.utils.loaders import BlockLoader import os import importlib @@ -10,10 +11,13 @@ import logging #================================================================= -# adapted from wsgiref.request_uri, but doesn't include domain name and allows all characters -# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3 +# adapted from wsgiref.request_uri, but doesn't include domain name +# and allows all characters which are allowed in the path segment +# according to: http://tools.ietf.org/html/rfc3986#section-3.3 # explained here: -# http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links +# http://stackoverflow.com/questions/4669692/ +# valid-characters-for-directory-part-of-a-url-for-short-links + def rel_request_uri(environ, include_query=1): """ Return the requested path, optionally including the query string @@ -28,7 +32,7 @@ def rel_request_uri(environ, include_query=1): "/web/example.com/0~!+$&'()*+,;=:%22" """ from urllib import quote - url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@') + url = quote(environ.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@') if include_query and environ.get('QUERY_STRING'): url += '?' + environ['QUERY_STRING'] @@ -50,7 +54,8 @@ def create_wb_app(wb_router): response = wb_router(env) if not response: - raise NotFoundException('No handler for "{0}"'.format(env['REL_REQUEST_URI'])) + msg = 'No handler for "{0}"'.format(env['REL_REQUEST_URI']) + raise NotFoundException(msg) except InternalRedirect as ir: response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders)) @@ -63,7 +68,6 @@ def create_wb_app(wb_router): return response(env, start_response) - return application @@ -94,16 +98,6 @@ def handle_exception(env, error_view, exc, print_trace): #================================================================= DEFAULT_CONFIG_FILE = 'config.yaml' -def load_yaml_config(config_file=None): - import yaml - - if not config_file: - config_file = DEFAULT_CONFIG_FILE - - configdata = BlockLoader().load(config_file) - config = yaml.load(configdata) - return config - #================================================================= def init_app(init_func, load_yaml=True, config_file=None): @@ -114,6 +108,9 @@ def init_app(init_func, load_yaml=True, config_file=None): if load_yaml: if not config_file: config_file = os.environ.get('PYWB_CONFIG_FILE') + if not config_file: + config_file = DEFAULT_CONFIG_FILE + config = load_yaml_config(config_file) try: @@ -135,6 +132,7 @@ def init_app(init_func, load_yaml=True, config_file=None): #================================================================= DEFAULT_PORT = 8080 + def start_wsgi_server(the_app): from wsgiref.simple_server import make_server from optparse import OptionParser @@ -153,7 +151,6 @@ def start_wsgi_server(the_app): except: port = DEFAULT_PORT - logging.debug('Starting CDX Server on port %s', port) try: diff --git a/pywb/utils/dsrules.py b/pywb/utils/dsrules.py index 2e6f9626..bfbb5a1a 100644 --- a/pywb/utils/dsrules.py +++ b/pywb/utils/dsrules.py @@ -1,11 +1,10 @@ -import yaml import pkgutil +from loaders import load_yaml_config + #================================================================= -DEFAULT_RULES_FILE = 'rules.yaml' -DEFAULT_RULES_PKG = 'pywb' - +DEFAULT_RULES_FILE = 'pywb/rules.yaml' #================================================================= class RuleSet(object): @@ -23,10 +22,14 @@ class RuleSet(object): self.rules = [] - ds_rules_file = kwargs.get('ds_rules_file') default_rule_config = kwargs.get('default_rule_config') - config = self.load_default_rules(ds_rules_file) + ds_rules_file = kwargs.get('ds_rules_file') + + if not ds_rules_file: + ds_rules_file = DEFAULT_RULES_FILE + + config = load_yaml_config(ds_rules_file) rulesmap = config.get('rules') if config else None @@ -53,22 +56,6 @@ class RuleSet(object): if not def_key_found and default_rule_config is not None: self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config)) - @staticmethod - def load_default_rules(filename=None, pkg=None): - config = None - - if not filename: - filename = DEFAULT_RULES_FILE - - if not pkg: - pkg = DEFAULT_RULES_PKG - - if filename: - yaml_str = pkgutil.get_data(pkg, filename) - config = yaml.load(yaml_str) - - return config - def iter_matching(self, urlkey): """ Iterate over all matching rules for given urlkey diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 6f2fa6c9..0f925105 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -7,12 +7,20 @@ import os import hmac import urllib2 import time -from pkg_resources import resource_stream +import pkg_resources #================================================================= def is_http(filename): - return any(filename.startswith(x) for x in ['http://', 'https://']) + return filename.startswith(('http://', 'https://')) + + +#================================================================= +def load_yaml_config(config_file): + import yaml + configdata = BlockLoader().load(config_file) + config = yaml.load(configdata) + return config #================================================================= @@ -39,16 +47,27 @@ class BlockLoader(object): Load a file-like reader from the local file system """ + file_only = False + if url.startswith('file://'): url = url[len('file://'):] + file_only = True try: # first, try as file afile = open(url, 'rb') - except IOError as file_err: + + except IOError: + #if file_only: + # raise + # then, try as package.path/file pkg_split = url.split('/', 1) - afile = resource_stream(pkg_split[0], pkg_split[1]) + #if len(pkg_split) == 1: + # raise + + afile = pkg_resources.resource_stream(pkg_split[0], + pkg_split[1]) if offset > 0: afile.seek(offset) diff --git a/tests/test_integration.py b/tests/test_integration.py index b71e8574..6e539c31 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,6 +1,6 @@ import webtest -from pywb.bootstrap.pywb_init import create_wb_router -from pywb.bootstrap.wsgi_wrappers import init_app +from pywb.core.pywb_init import create_wb_router +from pywb.framework.wsgi_wrappers import init_app from pywb.cdx.cdxobject import CDXObject from fixture import TestExclusionPerms