1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

more refactoring!

create 'framework' subpackage for general purpose components!
contains routing, request/response, exceptions and wsgi wrappers
update framework package for pep8
dsrules: using load_config_yaml() (pushed to utils)
to init default config
This commit is contained in:
Ilya Kreymer 2014-03-02 21:42:05 -08:00
parent f1acad53fc
commit f0a0976038
17 changed files with 138 additions and 121 deletions

View File

@ -1,5 +1,5 @@
from pywb.bootstrap.wsgi_wrappers import init_app, start_wsgi_server from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
from pywb.bootstrap.pywb_init import create_wb_router from pywb.core.pywb_init import create_wb_router
#================================================================= #=================================================================
# init pywb app # init pywb app

View File

@ -5,7 +5,7 @@ import time
from pywb.rewrite.wburl import WbUrl from pywb.rewrite.wburl import WbUrl
from pywb.cdx.query import CDXQuery from pywb.cdx.query import CDXQuery
from wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from wbexceptions import WbException, NotFoundException from wbexceptions import WbException, NotFoundException
from views import TextCapturesView from views import TextCapturesView

View File

@ -1,25 +1,25 @@
from pywb.dispatch.archivalrouter import ArchivalRouter, Route from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.dispatch.proxy import ProxyArchivalRouter from pywb.framework.proxy import ProxyArchivalRouter
from pywb.warc.recordloader import ArcWarcRecordLoader from pywb.warc.recordloader import ArcWarcRecordLoader
from pywb.warc.resolvingloader import ResolvingLoader from pywb.warc.resolvingloader import ResolvingLoader
from pywb.rewrite.rewrite_content import RewriteContent from pywb.rewrite.rewrite_content import RewriteContent
from pywb.core.indexreader import IndexReader from indexreader import IndexReader
from pywb.core.views import J2TemplateView, J2HtmlCapturesView from views import J2TemplateView, J2HtmlCapturesView
from pywb.core.handlers import WBHandler from replay_views import ReplayView
from pywb.core.replay_views import ReplayView
from pywb.core.handlers import CDXHandler, StaticHandler from handlers import WBHandler
from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler from handlers import CDXHandler, StaticHandler
from handlers import DebugEchoHandler, DebugEchoEnvHandler
from pywb.utils.loaders import BlockLoader
import os import os
import yaml import yaml
import logging import logging
#================================================================= #=================================================================
DEFAULTS = { DEFAULTS = {
'hostpaths': ['http://localhost:8080'], 'hostpaths': ['http://localhost:8080'],
@ -34,7 +34,7 @@ DEFAULTS = {
'static_routes': {'static/default': 'static/'}, 'static_routes': {'static/default': 'static/'},
'domain_specific_rules': 'rules.yaml', 'domain_specific_rules': 'pywb/rules.yaml',
} }
#================================================================= #=================================================================

View File

@ -2,9 +2,9 @@ import StringIO
from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.utils.bufferedreaders import ChunkedDataReader from pywb.utils.bufferedreaders import ChunkedDataReader
from wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from wbexceptions import CaptureException, InternalRedirect from pywb.framework.wbexceptions import CaptureException, InternalRedirect
from pywb.warc.recordloader import ArchiveLoadFailed from pywb.warc.recordloader import ArchiveLoadFailed
from pywb.utils.loaders import LimitReader from pywb.utils.loaders import LimitReader

View File

@ -1,5 +1,5 @@
from pywb.utils.timeutils import timestamp_to_datetime from pywb.utils.timeutils import timestamp_to_datetime
from wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
import urlparse import urlparse
import time import time

View File

@ -1,15 +1,17 @@
import urlparse import urlparse
import re import re
from pywb.core.wbrequestresponse import WbRequest, WbResponse
from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.url_rewriter import UrlRewriter
from wbrequestresponse import WbRequest, WbResponse
#================================================================= #=================================================================
# ArchivalRouter -- route WB requests in archival mode # ArchivalRouter -- route WB requests in archival mode
#================================================================= #=================================================================
class ArchivalRouter: class ArchivalRouter(object):
def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None): def __init__(self, routes, hostpaths=None, abs_path=True,
home_view=None, error_view=None):
self.routes = routes self.routes = routes
self.fallback = ReferRedirect(hostpaths) self.fallback = ReferRedirect(hostpaths)
self.abs_path = abs_path self.abs_path = abs_path
@ -29,26 +31,27 @@ class ArchivalRouter:
return self.fallback(env, self.routes) if self.fallback else None return self.fallback(env, self.routes) if self.fallback else None
def render_home_page(self): def render_home_page(self):
# render the homepage! # render the homepage!
if self.home_view: if self.home_view:
return self.home_view.render_response(routes = self.routes) return self.home_view.render_response(routes=self.routes)
else: else:
# default home page template # default home page template
text = '\n'.join(map(str, self.routes)) text = '\n'.join(map(str, self.routes))
return WbResponse.text_response(text) return WbResponse.text_response(text)
#================================================================= #=================================================================
# Route by matching regex (or fixed prefix) # Route by matching regex (or fixed prefix)
# of request uri (excluding first '/') # of request uri (excluding first '/')
#================================================================= #=================================================================
class Route: class Route(object):
# match upto next / or ? or end # match upto next / or ? or end
SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)' SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)'
def __init__(self, regex, handler, coll_group=0, config={},
lookahead=SLASH_QUERY_LOOKAHEAD):
def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD):
self.path = regex self.path = regex
if regex: if regex:
self.regex = re.compile(regex + lookahead) self.regex = re.compile(regex + lookahead)
@ -59,12 +62,11 @@ class Route:
self.coll_group = coll_group self.coll_group = coll_group
self._custom_init(config) self._custom_init(config)
def __call__(self, env, use_abs_prefix): def __call__(self, env, use_abs_prefix):
wbrequest = self.parse_request(env, use_abs_prefix) wbrequest = self.parse_request(env, use_abs_prefix)
return self.handler(wbrequest) if wbrequest else None return self.handler(wbrequest) if wbrequest else None
def parse_request(self, env, use_abs_prefix, request_uri = None): def parse_request(self, env, use_abs_prefix, request_uri=None):
if not request_uri: if not request_uri:
request_uri = env['REL_REQUEST_URI'] request_uri = env['REL_REQUEST_URI']
@ -75,10 +77,12 @@ class Route:
matched_str = matcher.group(0) matched_str = matcher.group(0)
if matched_str: if matched_str:
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/' rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri # remove the '/' + rel_prefix part of uri
wb_url_str = request_uri[len(matched_str) + 2:]
else: else:
rel_prefix = env['SCRIPT_NAME'] + '/' rel_prefix = env['SCRIPT_NAME'] + '/'
wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll # the request_uri is the wb_url, since no coll
wb_url_str = request_uri[1:]
coll = matcher.group(self.coll_group) coll = matcher.group(self.coll_group)
@ -88,20 +92,19 @@ class Route:
rel_prefix=rel_prefix, rel_prefix=rel_prefix,
coll=coll, coll=coll,
use_abs_prefix=use_abs_prefix, use_abs_prefix=use_abs_prefix,
wburl_class = self.handler.get_wburl_type(), wburl_class=self.handler.get_wburl_type(),
urlrewriter_class=UrlRewriter) urlrewriter_class=UrlRewriter)
# Allow for applying of additional filters # Allow for applying of additional filters
self._apply_filters(wbrequest, matcher) self._apply_filters(wbrequest, matcher)
return wbrequest return wbrequest
def _apply_filters(self, wbrequest, matcher): def _apply_filters(self, wbrequest, matcher):
for filter in self.filters: for filter in self.filters:
last_grp = len(matcher.groups()) last_grp = len(matcher.groups())
wbrequest.query_filter.append(filter.format(matcher.group(last_grp))) filter_str = filter.format(matcher.group(last_grp))
wbrequest.query_filter.append(filter_str)
def _custom_init(self, config): def _custom_init(self, config):
self.filters = config.get('filters', []) self.filters = config.get('filters', [])
@ -112,7 +115,8 @@ class Route:
#================================================================= #=================================================================
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings # ReferRedirect -- redirect urls that have 'fallen through'
# based on the referrer settings
#================================================================= #=================================================================
class ReferRedirect: class ReferRedirect:
def __init__(self, match_prefixs): def __init__(self, match_prefixs):
@ -121,7 +125,6 @@ class ReferRedirect:
else: else:
self.match_prefixs = [match_prefixs] self.match_prefixs = [match_prefixs]
def __call__(self, env, routes): def __call__(self, env, routes):
referrer = env.get('HTTP_REFERER') referrer = env.get('HTTP_REFERER')
@ -133,7 +136,7 @@ class ReferRedirect:
ref_split = urlparse.urlsplit(referrer) ref_split = urlparse.urlsplit(referrer)
# ensure referrer starts with one of allowed hosts # ensure referrer starts with one of allowed hosts
if not any (referrer.startswith(i) for i in self.match_prefixs): if not any(referrer.startswith(i) for i in self.match_prefixs):
if ref_split.netloc != env.get('HTTP_HOST'): if ref_split.netloc != env.get('HTTP_HOST'):
return None return None
@ -144,13 +147,12 @@ class ReferRedirect:
if app_path: if app_path:
# must start with current app name, if not root # must start with current app name, if not root
if not path.startswith(app_path): if not path.startswith(app_path):
return None return None
path = path[len(app_path):] path = path[len(app_path):]
for route in routes: for route in routes:
ref_request = route.parse_request(env, False, request_uri = path) ref_request = route.parse_request(env, False, request_uri=path)
if ref_request: if ref_request:
break break
@ -174,6 +176,10 @@ class ReferRedirect:
# 2013/path.html -> /path.html # 2013/path.html -> /path.html
rel_request_uri = rel_request_uri[len(timestamp_path) - 1:] rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]
final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', '')) final_url = urlparse.urlunsplit((ref_split.scheme,
ref_split.netloc,
rewriter.rewrite(rel_request_uri),
'',
''))
return WbResponse.redir_response(final_url) return WbResponse.redir_response(final_url)

View File

@ -1,15 +1,19 @@
from pywb.core.wbrequestresponse import WbResponse, WbRequest from wbrequestresponse import WbResponse, WbRequest
from archivalrouter import ArchivalRouter from archivalrouter import ArchivalRouter
import urlparse import urlparse
#================================================================= #=================================================================
# An experimental router which combines both archival and proxy modes # An experimental router which combines both archival and proxy modes
# http proxy mode support is very simple: only latest capture is available currently # http proxy mode support is very simple so far:
# only latest capture is available currently
#================================================================= #=================================================================
class ProxyArchivalRouter: class ProxyArchivalRouter:
def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None): def __init__(self, routes, hostpaths=None, abs_path=True,
self.archival = ArchivalRouter(routes, hostpaths, abs_path, home_view, error_view) home_view=None, error_view=None):
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
home_view, error_view)
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view) self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
self.error_view = error_view self.error_view = error_view
@ -29,7 +33,7 @@ class ProxyArchivalRouter:
# Only supports latest capture replay at the moment # Only supports latest capture replay at the moment
#================================================================= #=================================================================
class ProxyRouter: class ProxyRouter:
def __init__(self, handler, hostpaths = None, error_view = None): def __init__(self, handler, hostpaths=None, error_view=None):
self.handler = handler self.handler = handler
self.hostpaths = hostpaths self.hostpaths = hostpaths
@ -56,27 +60,26 @@ class ProxyRouter:
return self.handler(wbrequest) return self.handler(wbrequest)
# Proxy Auto-Config (PAC) script for the proxy # Proxy Auto-Config (PAC) script for the proxy
def make_pac_response(self, env): def make_pac_response(self, env):
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT'] server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
buff = 'function FindProxyForURL (url, host) {\n' buff = 'function FindProxyForURL (url, host) {\n'
direct_cond =' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n' direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
for hostpath in self.hostpaths: for hostpath in self.hostpaths:
parts = urlparse.urlsplit(hostpath).netloc.split(':') parts = urlparse.urlsplit(hostpath).netloc.split(':')
buff += direct_cond.format(parts[0]) buff += direct.format(parts[0])
buff += direct_cond.format(env['SERVER_NAME']) buff += direct.format(env['SERVER_NAME'])
#buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0]) #buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport) buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
return WbResponse.text_response(buff, content_type = 'application/x-ns-proxy-autoconfig') content_type = 'application/x-ns-proxy-autoconfig'
return WbResponse.text_response(buff, content_type=content_type)
#================================================================= #=================================================================
@ -85,10 +88,11 @@ class ProxyRouter:
class ProxyHttpsUrlRewriter: class ProxyHttpsUrlRewriter:
HTTP = 'http://' HTTP = 'http://'
HTTPS = 'https://' HTTPS = 'https://'
def __init__(self, wbrequest, prefix): def __init__(self, wbrequest, prefix):
pass pass
def rewrite(self, url, mod = None): def rewrite(self, url, mod=None):
if url.startswith(self.HTTPS): if url.startswith(self.HTTPS):
return self.HTTP + url[len(self.HTTPS):] return self.HTTP + url[len(self.HTTPS):]
else: else:
@ -97,6 +101,5 @@ class ProxyHttpsUrlRewriter:
def get_timestamp_url(self, timestamp, url): def get_timestamp_url(self, timestamp, url):
return url return url
def get_abs_url(self, url = ''): def get_abs_url(self, url=''):
return url return url

View File

@ -84,7 +84,7 @@ False
""" """
from pywb.dispatch.archivalrouter import Route, ReferRedirect from pywb.framework.archivalrouter import Route, ReferRedirect
from pywb.core.handlers import BaseHandler, WbUrlHandler from pywb.core.handlers import BaseHandler, WbUrlHandler
import pprint import pprint

View File

@ -41,7 +41,7 @@ from pywb.rewrite.wburl import WbUrl
from pywb.rewrite.url_rewriter import UrlRewriter from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.core.wbrequestresponse import WbRequest, WbResponse from pywb.framework.wbrequestresponse import WbRequest, WbResponse
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False): def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):

View File

@ -26,7 +26,6 @@ class WbRequest:
except KeyError: except KeyError:
return '' return ''
def __init__(self, env, def __init__(self, env,
request_uri=None, request_uri=None,
rel_prefix='', rel_prefix='',
@ -40,7 +39,10 @@ class WbRequest:
self.env = env self.env = env
self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI') if request_uri:
self.request_uri = request_uri
else:
self.request_uri = env.get('REL_REQUEST_URI')
self.coll = coll self.coll = coll
@ -55,7 +57,6 @@ class WbRequest:
else: else:
self.wb_prefix = rel_prefix self.wb_prefix = rel_prefix
if not wb_url_str: if not wb_url_str:
wb_url_str = '/' wb_url_str = '/'
@ -83,7 +84,6 @@ class WbRequest:
# PERF # PERF
env['X_PERF'] = {} env['X_PERF'] = {}
def _is_ajax(self): def _is_ajax(self):
value = self.env.get('HTTP_X_REQUESTED_WITH') value = self.env.get('HTTP_X_REQUESTED_WITH')
if not value: if not value:
@ -96,7 +96,6 @@ class WbRequest:
return True return True
return False return False
def __repr__(self): def __repr__(self):
varlist = vars(self) varlist = vars(self)
varstr = pprint.pformat(varlist) varstr = pprint.pformat(varlist)
@ -111,32 +110,39 @@ class WbResponse:
Holds a status_headers object and a response iter, to be Holds a status_headers object and a response iter, to be
returned to wsgi container. returned to wsgi container.
""" """
def __init__(self, status_headers, value = []): def __init__(self, status_headers, value=[]):
self.status_headers = status_headers self.status_headers = status_headers
self.body = value self.body = value
@staticmethod @staticmethod
def text_stream(text, status = '200 OK', content_type = 'text/plain'): def text_stream(stream, status='200 OK', content_type='text/plain'):
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text) status_headers = StatusAndHeaders(status,
[('Content-Type', content_type)])
return WbResponse(status_headers, value=stream)
@staticmethod @staticmethod
def text_response(text, status = '200 OK', content_type = 'text/plain'): def text_response(text, status='200 OK', content_type='text/plain'):
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text]) status_headers = StatusAndHeaders(status,
[('Content-Type', content_type)])
return WbResponse(status_headers, value=[text])
@staticmethod @staticmethod
def redir_response(location, status = '302 Redirect'): def redir_response(location, status='302 Redirect'):
return WbResponse(StatusAndHeaders(status, [('Location', location)])) return WbResponse(StatusAndHeaders(status,
[('Location', location)]))
def __call__(self, env, start_response): def __call__(self, env, start_response):
# PERF # PERF
perfstats = env.get('X_PERF') perfstats = env.get('X_PERF')
if perfstats: if perfstats:
self.status_headers.headers.append(('X-Archive-Perf-Stats', str(perfstats))) self.status_headers.headers.append(('X-Archive-Perf-Stats',
str(perfstats)))
start_response(self.status_headers.statusline,
start_response(self.status_headers.statusline, self.status_headers.headers) self.status_headers.headers)
if env['REQUEST_METHOD'] == 'HEAD': if env['REQUEST_METHOD'] == 'HEAD':
if hasattr(self.body, 'close'): if hasattr(self.body, 'close'):
@ -148,6 +154,5 @@ class WbResponse:
else: else:
return [str(self.body)] return [str(self.body)]
def __repr__(self): def __repr__(self):
return str(vars(self)) return str(vars(self))

View File

@ -1,8 +1,9 @@
from pywb.utils.wbexception import WbException from pywb.utils.wbexception import WbException
from pywb.core.wbexceptions import NotFoundException, InternalRedirect from pywb.utils.loaders import load_yaml_config
from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders
from wbexceptions import NotFoundException, InternalRedirect
from wbrequestresponse import WbResponse, StatusAndHeaders
from pywb.utils.loaders import BlockLoader
import os import os
import importlib import importlib
@ -10,10 +11,13 @@ import logging
#================================================================= #=================================================================
# adapted from wsgiref.request_uri, but doesn't include domain name and allows all characters # adapted from wsgiref.request_uri, but doesn't include domain name
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3 # and allows all characters which are allowed in the path segment
# according to: http://tools.ietf.org/html/rfc3986#section-3.3
# explained here: # explained here:
# http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links # http://stackoverflow.com/questions/4669692/
# valid-characters-for-directory-part-of-a-url-for-short-links
def rel_request_uri(environ, include_query=1): def rel_request_uri(environ, include_query=1):
""" """
Return the requested path, optionally including the query string Return the requested path, optionally including the query string
@ -28,7 +32,7 @@ def rel_request_uri(environ, include_query=1):
"/web/example.com/0~!+$&'()*+,;=:%22" "/web/example.com/0~!+$&'()*+,;=:%22"
""" """
from urllib import quote from urllib import quote
url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@') url = quote(environ.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
if include_query and environ.get('QUERY_STRING'): if include_query and environ.get('QUERY_STRING'):
url += '?' + environ['QUERY_STRING'] url += '?' + environ['QUERY_STRING']
@ -50,7 +54,8 @@ def create_wb_app(wb_router):
response = wb_router(env) response = wb_router(env)
if not response: if not response:
raise NotFoundException('No handler for "{0}"'.format(env['REL_REQUEST_URI'])) msg = 'No handler for "{0}"'.format(env['REL_REQUEST_URI'])
raise NotFoundException(msg)
except InternalRedirect as ir: except InternalRedirect as ir:
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders)) response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
@ -63,7 +68,6 @@ def create_wb_app(wb_router):
return response(env, start_response) return response(env, start_response)
return application return application
@ -94,16 +98,6 @@ def handle_exception(env, error_view, exc, print_trace):
#================================================================= #=================================================================
DEFAULT_CONFIG_FILE = 'config.yaml' DEFAULT_CONFIG_FILE = 'config.yaml'
def load_yaml_config(config_file=None):
import yaml
if not config_file:
config_file = DEFAULT_CONFIG_FILE
configdata = BlockLoader().load(config_file)
config = yaml.load(configdata)
return config
#================================================================= #=================================================================
def init_app(init_func, load_yaml=True, config_file=None): def init_app(init_func, load_yaml=True, config_file=None):
@ -114,6 +108,9 @@ def init_app(init_func, load_yaml=True, config_file=None):
if load_yaml: if load_yaml:
if not config_file: if not config_file:
config_file = os.environ.get('PYWB_CONFIG_FILE') config_file = os.environ.get('PYWB_CONFIG_FILE')
if not config_file:
config_file = DEFAULT_CONFIG_FILE
config = load_yaml_config(config_file) config = load_yaml_config(config_file)
try: try:
@ -135,6 +132,7 @@ def init_app(init_func, load_yaml=True, config_file=None):
#================================================================= #=================================================================
DEFAULT_PORT = 8080 DEFAULT_PORT = 8080
def start_wsgi_server(the_app): def start_wsgi_server(the_app):
from wsgiref.simple_server import make_server from wsgiref.simple_server import make_server
from optparse import OptionParser from optparse import OptionParser
@ -153,7 +151,6 @@ def start_wsgi_server(the_app):
except: except:
port = DEFAULT_PORT port = DEFAULT_PORT
logging.debug('Starting CDX Server on port %s', port) logging.debug('Starting CDX Server on port %s', port)
try: try:

View File

@ -1,11 +1,10 @@
import yaml
import pkgutil import pkgutil
from loaders import load_yaml_config
#================================================================= #=================================================================
DEFAULT_RULES_FILE = 'rules.yaml' DEFAULT_RULES_FILE = 'pywb/rules.yaml'
DEFAULT_RULES_PKG = 'pywb'
#================================================================= #=================================================================
class RuleSet(object): class RuleSet(object):
@ -23,10 +22,14 @@ class RuleSet(object):
self.rules = [] self.rules = []
ds_rules_file = kwargs.get('ds_rules_file')
default_rule_config = kwargs.get('default_rule_config') default_rule_config = kwargs.get('default_rule_config')
config = self.load_default_rules(ds_rules_file) ds_rules_file = kwargs.get('ds_rules_file')
if not ds_rules_file:
ds_rules_file = DEFAULT_RULES_FILE
config = load_yaml_config(ds_rules_file)
rulesmap = config.get('rules') if config else None rulesmap = config.get('rules') if config else None
@ -53,22 +56,6 @@ class RuleSet(object):
if not def_key_found and default_rule_config is not None: if not def_key_found and default_rule_config is not None:
self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config)) self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))
@staticmethod
def load_default_rules(filename=None, pkg=None):
config = None
if not filename:
filename = DEFAULT_RULES_FILE
if not pkg:
pkg = DEFAULT_RULES_PKG
if filename:
yaml_str = pkgutil.get_data(pkg, filename)
config = yaml.load(yaml_str)
return config
def iter_matching(self, urlkey): def iter_matching(self, urlkey):
""" """
Iterate over all matching rules for given urlkey Iterate over all matching rules for given urlkey

View File

@ -7,12 +7,20 @@ import os
import hmac import hmac
import urllib2 import urllib2
import time import time
from pkg_resources import resource_stream import pkg_resources
#================================================================= #=================================================================
def is_http(filename): def is_http(filename):
return any(filename.startswith(x) for x in ['http://', 'https://']) return filename.startswith(('http://', 'https://'))
#=================================================================
def load_yaml_config(config_file):
import yaml
configdata = BlockLoader().load(config_file)
config = yaml.load(configdata)
return config
#================================================================= #=================================================================
@ -39,16 +47,27 @@ class BlockLoader(object):
Load a file-like reader from the local file system Load a file-like reader from the local file system
""" """
file_only = False
if url.startswith('file://'): if url.startswith('file://'):
url = url[len('file://'):] url = url[len('file://'):]
file_only = True
try: try:
# first, try as file # first, try as file
afile = open(url, 'rb') afile = open(url, 'rb')
except IOError as file_err:
except IOError:
#if file_only:
# raise
# then, try as package.path/file # then, try as package.path/file
pkg_split = url.split('/', 1) pkg_split = url.split('/', 1)
afile = resource_stream(pkg_split[0], pkg_split[1]) #if len(pkg_split) == 1:
# raise
afile = pkg_resources.resource_stream(pkg_split[0],
pkg_split[1])
if offset > 0: if offset > 0:
afile.seek(offset) afile.seek(offset)

View File

@ -1,6 +1,6 @@
import webtest import webtest
from pywb.bootstrap.pywb_init import create_wb_router from pywb.core.pywb_init import create_wb_router
from pywb.bootstrap.wsgi_wrappers import init_app from pywb.framework.wsgi_wrappers import init_app
from pywb.cdx.cdxobject import CDXObject from pywb.cdx.cdxobject import CDXObject
from fixture import TestExclusionPerms from fixture import TestExclusionPerms