mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
more refactoring!
create 'framework' subpackage for general purpose components! contains routing, request/response, exceptions and wsgi wrappers update framework package for pep8 dsrules: using load_config_yaml() (pushed to utils) to init default config
This commit is contained in:
parent
f1acad53fc
commit
f0a0976038
@ -1,5 +1,5 @@
|
||||
from pywb.bootstrap.wsgi_wrappers import init_app, start_wsgi_server
|
||||
from pywb.bootstrap.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
||||
from pywb.core.pywb_init import create_wb_router
|
||||
|
||||
#=================================================================
|
||||
# init pywb app
|
||||
|
@ -5,7 +5,7 @@ import time
|
||||
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from wbrequestresponse import WbResponse
|
||||
from pywb.framework.wbrequestresponse import WbResponse
|
||||
from wbexceptions import WbException, NotFoundException
|
||||
from views import TextCapturesView
|
||||
|
||||
|
@ -1,25 +1,25 @@
|
||||
from pywb.dispatch.archivalrouter import ArchivalRouter, Route
|
||||
from pywb.dispatch.proxy import ProxyArchivalRouter
|
||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||
from pywb.framework.proxy import ProxyArchivalRouter
|
||||
|
||||
from pywb.warc.recordloader import ArcWarcRecordLoader
|
||||
from pywb.warc.resolvingloader import ResolvingLoader
|
||||
|
||||
from pywb.rewrite.rewrite_content import RewriteContent
|
||||
|
||||
from pywb.core.indexreader import IndexReader
|
||||
from pywb.core.views import J2TemplateView, J2HtmlCapturesView
|
||||
from pywb.core.handlers import WBHandler
|
||||
from pywb.core.replay_views import ReplayView
|
||||
from indexreader import IndexReader
|
||||
from views import J2TemplateView, J2HtmlCapturesView
|
||||
from replay_views import ReplayView
|
||||
|
||||
from pywb.core.handlers import CDXHandler, StaticHandler
|
||||
from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||
from handlers import WBHandler
|
||||
from handlers import CDXHandler, StaticHandler
|
||||
from handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||
|
||||
from pywb.utils.loaders import BlockLoader
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import logging
|
||||
|
||||
|
||||
#=================================================================
|
||||
DEFAULTS = {
|
||||
'hostpaths': ['http://localhost:8080'],
|
||||
@ -34,7 +34,7 @@ DEFAULTS = {
|
||||
|
||||
'static_routes': {'static/default': 'static/'},
|
||||
|
||||
'domain_specific_rules': 'rules.yaml',
|
||||
'domain_specific_rules': 'pywb/rules.yaml',
|
||||
}
|
||||
|
||||
#=================================================================
|
@ -2,9 +2,9 @@ import StringIO
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.utils.bufferedreaders import ChunkedDataReader
|
||||
from wbrequestresponse import WbResponse
|
||||
from pywb.framework.wbrequestresponse import WbResponse
|
||||
|
||||
from wbexceptions import CaptureException, InternalRedirect
|
||||
from pywb.framework.wbexceptions import CaptureException, InternalRedirect
|
||||
from pywb.warc.recordloader import ArchiveLoadFailed
|
||||
|
||||
from pywb.utils.loaders import LimitReader
|
||||
|
@ -1,5 +1,5 @@
|
||||
from pywb.utils.timeutils import timestamp_to_datetime
|
||||
from wbrequestresponse import WbResponse
|
||||
from pywb.framework.wbrequestresponse import WbResponse
|
||||
|
||||
import urlparse
|
||||
import time
|
||||
|
@ -1,15 +1,17 @@
|
||||
import urlparse
|
||||
import re
|
||||
|
||||
from pywb.core.wbrequestresponse import WbRequest, WbResponse
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from wbrequestresponse import WbRequest, WbResponse
|
||||
|
||||
|
||||
#=================================================================
|
||||
# ArchivalRouter -- route WB requests in archival mode
|
||||
#=================================================================
|
||||
class ArchivalRouter:
|
||||
def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None):
|
||||
class ArchivalRouter(object):
|
||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||
home_view=None, error_view=None):
|
||||
|
||||
self.routes = routes
|
||||
self.fallback = ReferRedirect(hostpaths)
|
||||
self.abs_path = abs_path
|
||||
@ -29,26 +31,27 @@ class ArchivalRouter:
|
||||
|
||||
return self.fallback(env, self.routes) if self.fallback else None
|
||||
|
||||
|
||||
def render_home_page(self):
|
||||
# render the homepage!
|
||||
if self.home_view:
|
||||
return self.home_view.render_response(routes = self.routes)
|
||||
return self.home_view.render_response(routes=self.routes)
|
||||
else:
|
||||
# default home page template
|
||||
text = '\n'.join(map(str, self.routes))
|
||||
return WbResponse.text_response(text)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Route by matching regex (or fixed prefix)
|
||||
# of request uri (excluding first '/')
|
||||
#=================================================================
|
||||
class Route:
|
||||
class Route(object):
|
||||
# match upto next / or ? or end
|
||||
SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)'
|
||||
SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)'
|
||||
|
||||
def __init__(self, regex, handler, coll_group=0, config={},
|
||||
lookahead=SLASH_QUERY_LOOKAHEAD):
|
||||
|
||||
def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD):
|
||||
self.path = regex
|
||||
if regex:
|
||||
self.regex = re.compile(regex + lookahead)
|
||||
@ -59,12 +62,11 @@ class Route:
|
||||
self.coll_group = coll_group
|
||||
self._custom_init(config)
|
||||
|
||||
|
||||
def __call__(self, env, use_abs_prefix):
|
||||
wbrequest = self.parse_request(env, use_abs_prefix)
|
||||
return self.handler(wbrequest) if wbrequest else None
|
||||
|
||||
def parse_request(self, env, use_abs_prefix, request_uri = None):
|
||||
def parse_request(self, env, use_abs_prefix, request_uri=None):
|
||||
if not request_uri:
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
|
||||
@ -75,10 +77,12 @@ class Route:
|
||||
matched_str = matcher.group(0)
|
||||
if matched_str:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
|
||||
wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri
|
||||
# remove the '/' + rel_prefix part of uri
|
||||
wb_url_str = request_uri[len(matched_str) + 2:]
|
||||
else:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/'
|
||||
wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll
|
||||
# the request_uri is the wb_url, since no coll
|
||||
wb_url_str = request_uri[1:]
|
||||
|
||||
coll = matcher.group(self.coll_group)
|
||||
|
||||
@ -88,20 +92,19 @@ class Route:
|
||||
rel_prefix=rel_prefix,
|
||||
coll=coll,
|
||||
use_abs_prefix=use_abs_prefix,
|
||||
wburl_class = self.handler.get_wburl_type(),
|
||||
wburl_class=self.handler.get_wburl_type(),
|
||||
urlrewriter_class=UrlRewriter)
|
||||
|
||||
|
||||
# Allow for applying of additional filters
|
||||
self._apply_filters(wbrequest, matcher)
|
||||
|
||||
return wbrequest
|
||||
|
||||
|
||||
def _apply_filters(self, wbrequest, matcher):
|
||||
for filter in self.filters:
|
||||
last_grp = len(matcher.groups())
|
||||
wbrequest.query_filter.append(filter.format(matcher.group(last_grp)))
|
||||
filter_str = filter.format(matcher.group(last_grp))
|
||||
wbrequest.query_filter.append(filter_str)
|
||||
|
||||
def _custom_init(self, config):
|
||||
self.filters = config.get('filters', [])
|
||||
@ -112,7 +115,8 @@ class Route:
|
||||
|
||||
|
||||
#=================================================================
|
||||
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
||||
# ReferRedirect -- redirect urls that have 'fallen through'
|
||||
# based on the referrer settings
|
||||
#=================================================================
|
||||
class ReferRedirect:
|
||||
def __init__(self, match_prefixs):
|
||||
@ -121,7 +125,6 @@ class ReferRedirect:
|
||||
else:
|
||||
self.match_prefixs = [match_prefixs]
|
||||
|
||||
|
||||
def __call__(self, env, routes):
|
||||
referrer = env.get('HTTP_REFERER')
|
||||
|
||||
@ -133,7 +136,7 @@ class ReferRedirect:
|
||||
ref_split = urlparse.urlsplit(referrer)
|
||||
|
||||
# ensure referrer starts with one of allowed hosts
|
||||
if not any (referrer.startswith(i) for i in self.match_prefixs):
|
||||
if not any(referrer.startswith(i) for i in self.match_prefixs):
|
||||
if ref_split.netloc != env.get('HTTP_HOST'):
|
||||
return None
|
||||
|
||||
@ -144,13 +147,12 @@ class ReferRedirect:
|
||||
if app_path:
|
||||
# must start with current app name, if not root
|
||||
if not path.startswith(app_path):
|
||||
return None
|
||||
return None
|
||||
|
||||
path = path[len(app_path):]
|
||||
|
||||
|
||||
for route in routes:
|
||||
ref_request = route.parse_request(env, False, request_uri = path)
|
||||
ref_request = route.parse_request(env, False, request_uri=path)
|
||||
if ref_request:
|
||||
break
|
||||
|
||||
@ -174,6 +176,10 @@ class ReferRedirect:
|
||||
# 2013/path.html -> /path.html
|
||||
rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]
|
||||
|
||||
final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', ''))
|
||||
final_url = urlparse.urlunsplit((ref_split.scheme,
|
||||
ref_split.netloc,
|
||||
rewriter.rewrite(rel_request_uri),
|
||||
'',
|
||||
''))
|
||||
|
||||
return WbResponse.redir_response(final_url)
|
@ -1,15 +1,19 @@
|
||||
from pywb.core.wbrequestresponse import WbResponse, WbRequest
|
||||
from wbrequestresponse import WbResponse, WbRequest
|
||||
from archivalrouter import ArchivalRouter
|
||||
import urlparse
|
||||
|
||||
|
||||
#=================================================================
|
||||
# An experimental router which combines both archival and proxy modes
|
||||
# http proxy mode support is very simple: only latest capture is available currently
|
||||
# http proxy mode support is very simple so far:
|
||||
# only latest capture is available currently
|
||||
#=================================================================
|
||||
|
||||
class ProxyArchivalRouter:
|
||||
def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None):
|
||||
self.archival = ArchivalRouter(routes, hostpaths, abs_path, home_view, error_view)
|
||||
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||
home_view=None, error_view=None):
|
||||
|
||||
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
|
||||
home_view, error_view)
|
||||
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
||||
self.error_view = error_view
|
||||
|
||||
@ -29,7 +33,7 @@ class ProxyArchivalRouter:
|
||||
# Only supports latest capture replay at the moment
|
||||
#=================================================================
|
||||
class ProxyRouter:
|
||||
def __init__(self, handler, hostpaths = None, error_view = None):
|
||||
def __init__(self, handler, hostpaths=None, error_view=None):
|
||||
self.handler = handler
|
||||
self.hostpaths = hostpaths
|
||||
|
||||
@ -56,27 +60,26 @@ class ProxyRouter:
|
||||
|
||||
return self.handler(wbrequest)
|
||||
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
def make_pac_response(self, env):
|
||||
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
||||
|
||||
buff = 'function FindProxyForURL (url, host) {\n'
|
||||
|
||||
direct_cond =' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
||||
direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
||||
|
||||
for hostpath in self.hostpaths:
|
||||
parts = urlparse.urlsplit(hostpath).netloc.split(':')
|
||||
buff += direct_cond.format(parts[0])
|
||||
buff += direct.format(parts[0])
|
||||
|
||||
buff += direct_cond.format(env['SERVER_NAME'])
|
||||
buff += direct.format(env['SERVER_NAME'])
|
||||
|
||||
#buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
|
||||
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
||||
|
||||
return WbResponse.text_response(buff, content_type = 'application/x-ns-proxy-autoconfig')
|
||||
|
||||
content_type = 'application/x-ns-proxy-autoconfig'
|
||||
|
||||
return WbResponse.text_response(buff, content_type=content_type)
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -85,10 +88,11 @@ class ProxyRouter:
|
||||
class ProxyHttpsUrlRewriter:
|
||||
HTTP = 'http://'
|
||||
HTTPS = 'https://'
|
||||
|
||||
def __init__(self, wbrequest, prefix):
|
||||
pass
|
||||
|
||||
def rewrite(self, url, mod = None):
|
||||
def rewrite(self, url, mod=None):
|
||||
if url.startswith(self.HTTPS):
|
||||
return self.HTTP + url[len(self.HTTPS):]
|
||||
else:
|
||||
@ -97,6 +101,5 @@ class ProxyHttpsUrlRewriter:
|
||||
def get_timestamp_url(self, timestamp, url):
|
||||
return url
|
||||
|
||||
def get_abs_url(self, url = ''):
|
||||
def get_abs_url(self, url=''):
|
||||
return url
|
||||
|
@ -84,7 +84,7 @@ False
|
||||
|
||||
"""
|
||||
|
||||
from pywb.dispatch.archivalrouter import Route, ReferRedirect
|
||||
from pywb.framework.archivalrouter import Route, ReferRedirect
|
||||
from pywb.core.handlers import BaseHandler, WbUrlHandler
|
||||
import pprint
|
||||
|
@ -41,7 +41,7 @@ from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
|
||||
from pywb.core.wbrequestresponse import WbRequest, WbResponse
|
||||
from pywb.framework.wbrequestresponse import WbRequest, WbResponse
|
||||
|
||||
|
||||
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
@ -26,7 +26,6 @@ class WbRequest:
|
||||
except KeyError:
|
||||
return ''
|
||||
|
||||
|
||||
def __init__(self, env,
|
||||
request_uri=None,
|
||||
rel_prefix='',
|
||||
@ -40,7 +39,10 @@ class WbRequest:
|
||||
|
||||
self.env = env
|
||||
|
||||
self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
|
||||
if request_uri:
|
||||
self.request_uri = request_uri
|
||||
else:
|
||||
self.request_uri = env.get('REL_REQUEST_URI')
|
||||
|
||||
self.coll = coll
|
||||
|
||||
@ -55,7 +57,6 @@ class WbRequest:
|
||||
else:
|
||||
self.wb_prefix = rel_prefix
|
||||
|
||||
|
||||
if not wb_url_str:
|
||||
wb_url_str = '/'
|
||||
|
||||
@ -83,7 +84,6 @@ class WbRequest:
|
||||
# PERF
|
||||
env['X_PERF'] = {}
|
||||
|
||||
|
||||
def _is_ajax(self):
|
||||
value = self.env.get('HTTP_X_REQUESTED_WITH')
|
||||
if not value:
|
||||
@ -96,7 +96,6 @@ class WbRequest:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
varlist = vars(self)
|
||||
varstr = pprint.pformat(varlist)
|
||||
@ -111,32 +110,39 @@ class WbResponse:
|
||||
Holds a status_headers object and a response iter, to be
|
||||
returned to wsgi container.
|
||||
"""
|
||||
def __init__(self, status_headers, value = []):
|
||||
def __init__(self, status_headers, value=[]):
|
||||
self.status_headers = status_headers
|
||||
self.body = value
|
||||
|
||||
@staticmethod
|
||||
def text_stream(text, status = '200 OK', content_type = 'text/plain'):
|
||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text)
|
||||
def text_stream(stream, status='200 OK', content_type='text/plain'):
|
||||
status_headers = StatusAndHeaders(status,
|
||||
[('Content-Type', content_type)])
|
||||
|
||||
return WbResponse(status_headers, value=stream)
|
||||
|
||||
@staticmethod
|
||||
def text_response(text, status = '200 OK', content_type = 'text/plain'):
|
||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text])
|
||||
def text_response(text, status='200 OK', content_type='text/plain'):
|
||||
status_headers = StatusAndHeaders(status,
|
||||
[('Content-Type', content_type)])
|
||||
|
||||
return WbResponse(status_headers, value=[text])
|
||||
|
||||
@staticmethod
|
||||
def redir_response(location, status = '302 Redirect'):
|
||||
return WbResponse(StatusAndHeaders(status, [('Location', location)]))
|
||||
|
||||
def redir_response(location, status='302 Redirect'):
|
||||
return WbResponse(StatusAndHeaders(status,
|
||||
[('Location', location)]))
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
|
||||
# PERF
|
||||
perfstats = env.get('X_PERF')
|
||||
if perfstats:
|
||||
self.status_headers.headers.append(('X-Archive-Perf-Stats', str(perfstats)))
|
||||
self.status_headers.headers.append(('X-Archive-Perf-Stats',
|
||||
str(perfstats)))
|
||||
|
||||
|
||||
start_response(self.status_headers.statusline, self.status_headers.headers)
|
||||
start_response(self.status_headers.statusline,
|
||||
self.status_headers.headers)
|
||||
|
||||
if env['REQUEST_METHOD'] == 'HEAD':
|
||||
if hasattr(self.body, 'close'):
|
||||
@ -148,6 +154,5 @@ class WbResponse:
|
||||
else:
|
||||
return [str(self.body)]
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return str(vars(self))
|
@ -1,8 +1,9 @@
|
||||
from pywb.utils.wbexception import WbException
|
||||
from pywb.core.wbexceptions import NotFoundException, InternalRedirect
|
||||
from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders
|
||||
from pywb.utils.loaders import load_yaml_config
|
||||
|
||||
from wbexceptions import NotFoundException, InternalRedirect
|
||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||
|
||||
from pywb.utils.loaders import BlockLoader
|
||||
|
||||
import os
|
||||
import importlib
|
||||
@ -10,10 +11,13 @@ import logging
|
||||
|
||||
|
||||
#=================================================================
|
||||
# adapted from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
||||
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||
# adapted from wsgiref.request_uri, but doesn't include domain name
|
||||
# and allows all characters which are allowed in the path segment
|
||||
# according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||
# explained here:
|
||||
# http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
||||
# http://stackoverflow.com/questions/4669692/
|
||||
# valid-characters-for-directory-part-of-a-url-for-short-links
|
||||
|
||||
def rel_request_uri(environ, include_query=1):
|
||||
"""
|
||||
Return the requested path, optionally including the query string
|
||||
@ -28,7 +32,7 @@ def rel_request_uri(environ, include_query=1):
|
||||
"/web/example.com/0~!+$&'()*+,;=:%22"
|
||||
"""
|
||||
from urllib import quote
|
||||
url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
||||
url = quote(environ.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
|
||||
if include_query and environ.get('QUERY_STRING'):
|
||||
url += '?' + environ['QUERY_STRING']
|
||||
|
||||
@ -50,7 +54,8 @@ def create_wb_app(wb_router):
|
||||
response = wb_router(env)
|
||||
|
||||
if not response:
|
||||
raise NotFoundException('No handler for "{0}"'.format(env['REL_REQUEST_URI']))
|
||||
msg = 'No handler for "{0}"'.format(env['REL_REQUEST_URI'])
|
||||
raise NotFoundException(msg)
|
||||
|
||||
except InternalRedirect as ir:
|
||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||
@ -63,7 +68,6 @@ def create_wb_app(wb_router):
|
||||
|
||||
return response(env, start_response)
|
||||
|
||||
|
||||
return application
|
||||
|
||||
|
||||
@ -94,16 +98,6 @@ def handle_exception(env, error_view, exc, print_trace):
|
||||
#=================================================================
|
||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||
|
||||
def load_yaml_config(config_file=None):
|
||||
import yaml
|
||||
|
||||
if not config_file:
|
||||
config_file = DEFAULT_CONFIG_FILE
|
||||
|
||||
configdata = BlockLoader().load(config_file)
|
||||
config = yaml.load(configdata)
|
||||
return config
|
||||
|
||||
|
||||
#=================================================================
|
||||
def init_app(init_func, load_yaml=True, config_file=None):
|
||||
@ -114,6 +108,9 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
||||
if load_yaml:
|
||||
if not config_file:
|
||||
config_file = os.environ.get('PYWB_CONFIG_FILE')
|
||||
if not config_file:
|
||||
config_file = DEFAULT_CONFIG_FILE
|
||||
|
||||
config = load_yaml_config(config_file)
|
||||
|
||||
try:
|
||||
@ -135,6 +132,7 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
||||
#=================================================================
|
||||
DEFAULT_PORT = 8080
|
||||
|
||||
|
||||
def start_wsgi_server(the_app):
|
||||
from wsgiref.simple_server import make_server
|
||||
from optparse import OptionParser
|
||||
@ -153,7 +151,6 @@ def start_wsgi_server(the_app):
|
||||
except:
|
||||
port = DEFAULT_PORT
|
||||
|
||||
|
||||
logging.debug('Starting CDX Server on port %s', port)
|
||||
|
||||
try:
|
@ -1,11 +1,10 @@
|
||||
import yaml
|
||||
import pkgutil
|
||||
from loaders import load_yaml_config
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
||||
DEFAULT_RULES_FILE = 'rules.yaml'
|
||||
DEFAULT_RULES_PKG = 'pywb'
|
||||
|
||||
DEFAULT_RULES_FILE = 'pywb/rules.yaml'
|
||||
|
||||
#=================================================================
|
||||
class RuleSet(object):
|
||||
@ -23,10 +22,14 @@ class RuleSet(object):
|
||||
|
||||
self.rules = []
|
||||
|
||||
ds_rules_file = kwargs.get('ds_rules_file')
|
||||
default_rule_config = kwargs.get('default_rule_config')
|
||||
|
||||
config = self.load_default_rules(ds_rules_file)
|
||||
ds_rules_file = kwargs.get('ds_rules_file')
|
||||
|
||||
if not ds_rules_file:
|
||||
ds_rules_file = DEFAULT_RULES_FILE
|
||||
|
||||
config = load_yaml_config(ds_rules_file)
|
||||
|
||||
rulesmap = config.get('rules') if config else None
|
||||
|
||||
@ -53,22 +56,6 @@ class RuleSet(object):
|
||||
if not def_key_found and default_rule_config is not None:
|
||||
self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))
|
||||
|
||||
@staticmethod
|
||||
def load_default_rules(filename=None, pkg=None):
|
||||
config = None
|
||||
|
||||
if not filename:
|
||||
filename = DEFAULT_RULES_FILE
|
||||
|
||||
if not pkg:
|
||||
pkg = DEFAULT_RULES_PKG
|
||||
|
||||
if filename:
|
||||
yaml_str = pkgutil.get_data(pkg, filename)
|
||||
config = yaml.load(yaml_str)
|
||||
|
||||
return config
|
||||
|
||||
def iter_matching(self, urlkey):
|
||||
"""
|
||||
Iterate over all matching rules for given urlkey
|
||||
|
@ -7,12 +7,20 @@ import os
|
||||
import hmac
|
||||
import urllib2
|
||||
import time
|
||||
from pkg_resources import resource_stream
|
||||
import pkg_resources
|
||||
|
||||
|
||||
#=================================================================
|
||||
def is_http(filename):
|
||||
return any(filename.startswith(x) for x in ['http://', 'https://'])
|
||||
return filename.startswith(('http://', 'https://'))
|
||||
|
||||
|
||||
#=================================================================
|
||||
def load_yaml_config(config_file):
|
||||
import yaml
|
||||
configdata = BlockLoader().load(config_file)
|
||||
config = yaml.load(configdata)
|
||||
return config
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -39,16 +47,27 @@ class BlockLoader(object):
|
||||
Load a file-like reader from the local file system
|
||||
"""
|
||||
|
||||
file_only = False
|
||||
|
||||
if url.startswith('file://'):
|
||||
url = url[len('file://'):]
|
||||
file_only = True
|
||||
|
||||
try:
|
||||
# first, try as file
|
||||
afile = open(url, 'rb')
|
||||
except IOError as file_err:
|
||||
|
||||
except IOError:
|
||||
#if file_only:
|
||||
# raise
|
||||
|
||||
# then, try as package.path/file
|
||||
pkg_split = url.split('/', 1)
|
||||
afile = resource_stream(pkg_split[0], pkg_split[1])
|
||||
#if len(pkg_split) == 1:
|
||||
# raise
|
||||
|
||||
afile = pkg_resources.resource_stream(pkg_split[0],
|
||||
pkg_split[1])
|
||||
|
||||
if offset > 0:
|
||||
afile.seek(offset)
|
||||
|
@ -1,6 +1,6 @@
|
||||
import webtest
|
||||
from pywb.bootstrap.pywb_init import create_wb_router
|
||||
from pywb.bootstrap.wsgi_wrappers import init_app
|
||||
from pywb.core.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
|
||||
from fixture import TestExclusionPerms
|
||||
|
Loading…
x
Reference in New Issue
Block a user