1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

config system: initial work on automated directory-convention based config!

config.yaml file now optional, add default_config.yaml which for default settings #55
This commit is contained in:
Ilya Kreymer 2015-02-23 21:59:41 -08:00
parent 5d80d2d891
commit 435fa390ed
3 changed files with 139 additions and 31 deletions

48
pywb/default_config.yaml Normal file
View File

@ -0,0 +1,48 @@
#collections:
# pywb: ./sample_archive/cdx/
#archive_paths: ./sample_archive/warcs/
collections_root: collections
paths:
archive_paths: warcs
index_paths: cdx
static_paths: static
templates_dir: templates
template_files:
banner_html: banner.html
head_insert: head_insert.html
frame_insert: frame_insert.html
query_html: query.html
search_html: search.html
banner_html: banner.html
head_insert_html: head_insert.html
frame_insert_html: frame_insert.html
home_html: index.html
query_html: query.html
search_html: search.html
error_html: error.html
not_found_html: not_found.html
proxy_cert_download_html: proxy_cert_download.html
proxy_select_html: proxy_select.html
template_globals:
static_path: static/default
static_routes:
static/default: pywb/static/
enable_memento: true
domain_specific_rules: pywb/rules.yaml

View File

@ -176,7 +176,8 @@ def init_app(init_func, load_yaml=True, config_file=None, config={}):
if not config_file:
config_file = DEFAULT_CONFIG_FILE
config = load_yaml_config(config_file)
if os.path.isfile(config_file):
config = load_yaml_config(config_file)
wb_router = init_func(config)
except:

View File

@ -1,4 +1,4 @@
from pywb.utils.dsrules import DEFAULT_RULES_FILE
from pywb.utils.loaders import load_yaml_config
from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.framework.proxy import ProxyArchivalRouter
@ -23,31 +23,7 @@ import logging
#=================================================================
DEFAULTS = {
'collections': {'pywb': './sample_archive/cdx/'},
'archive_paths': './sample_archive/warcs/',
'head_insert_html': 'ui/head_insert.html',
'banner_html': 'banner.html',
'query_html': 'ui/query.html',
'search_html': 'ui/search.html',
'home_html': 'ui/index.html',
'error_html': 'ui/error.html',
'not_found_html': 'ui/not_found.html',
'proxy_select_html': 'ui/proxy_select.html',
'proxy_cert_download_html': 'ui/proxy_cert_download.html',
'template_globals': {'static_path': 'static/default'},
'static_routes': {'static/default': 'pywb/static/'},
'domain_specific_rules': DEFAULT_RULES_FILE,
'enable_memento': True
}
DEFAULT_CONFIG = 'pywb/default_config.yaml'
#=================================================================
class DictChain:
@ -146,17 +122,99 @@ def create_cdx_server_app(passed_config):
return ArchivalRouter(routes)
#=================================================================
class DirectoryCollsLoader(object):
def __init__(self, config):
self.config = config
def __call__(self):
colls = {}
root_dir = self.config.get('collections_root')
if not root_dir:
return colls
for name in os.listdir(root_dir):
full = os.path.join(root_dir, name)
if not os.path.isdir(full):
continue
coll = self.load_dir(full)
if coll:
colls[name] = coll
return colls
def _add_if_exists(self, coll, root_dir, dir_key, required=False):
if dir_key in coll:
# already set
return False
thedir = self.config.get('paths').get(dir_key)
print(thedir)
if not thedir:
msg = 'No "{0}" for collection {1}'.format(dir_key, root_dir)
if required:
raise Exception(msg)
else:
logging.warn(msg)
return False
fulldir = os.path.join(root_dir, thedir)
if os.path.isdir(fulldir):
coll[dir_key] = fulldir.rstrip('/')
return True
elif required:
msg = 'Dir "{0}" does not exist for "{1}"'.format(fulldir, dir_key)
raise Exception(msg)
return False
def load_dir(self, root_dir):
config_file = os.path.join(root_dir, 'config.yaml')
if os.path.isfile(config_file):
coll = load_yaml_file(config_file)
else:
coll = {}
self._add_if_exists(coll, root_dir, 'index_paths', True)
self._add_if_exists(coll, root_dir, 'archive_paths', True)
self._add_if_exists(coll, root_dir, 'static_paths', False)
# Add templates
templates_dir = self.config.get('paths').get('templates_dir')
if templates_dir:
template_dir = os.path.join(root_dir, templates_dir)
if template_dir:
for tname, tfile in self.config.get('paths')['template_files'].iteritems():
if tname in coll:
# Already set
continue
full = os.path.join(root_dir, tfile)
if os.path.isfile(full):
coll[tname] = full
return coll
#=================================================================
def create_wb_router(passed_config={}):
config = DictChain(passed_config, DEFAULTS)
defaults = load_yaml_config(DEFAULT_CONFIG)
config = DictChain(passed_config, defaults)
routes = []
port = config.get('port')
# collections based on cdx source
collections = config.get('collections')
collections = config.get('collections', {})
# collections based on file system
dir_loader = DirectoryCollsLoader(config)
collections.update(dir_loader())
if config.get('enable_memento', False):
request_class = MementoRequest
@ -214,7 +272,8 @@ def create_wb_router(passed_config={}):
if config.get('debug_echo_req', False):
routes.append(Route('echo_req', DebugEchoHandler()))
static_routes = config.get('static_routes')
static_routes = config.get('static_routes', {})
print(static_routes)
for static_name, static_path in static_routes.iteritems():
routes.append(Route(static_name, StaticHandler(static_path)))