mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
cleanup pywb_init, work on documenting config.yaml!
This commit is contained in:
parent
43a46b373d
commit
411e7fe8a3
66
config.yaml
66
config.yaml
@ -1,17 +1,67 @@
|
||||
# pywb config file
|
||||
# ========================================
|
||||
#
|
||||
# Settings for each route are defined below
|
||||
# Each route may be an archival collection or other handler
|
||||
#
|
||||
routes:
|
||||
pywb:
|
||||
index_paths:
|
||||
- ./sample_archive/cdx/
|
||||
# route name (eg /pywb)
|
||||
- name: pywb
|
||||
|
||||
archive_paths:
|
||||
- ./sample_archive/warcs/
|
||||
# list of paths to search cdx files
|
||||
# * local .cdx file
|
||||
# * local dir, will include all .cdx files in dir
|
||||
#
|
||||
# or a string value indicating remote http cdx server
|
||||
index_paths:
|
||||
- ./sample_archive/cdx/
|
||||
|
||||
head_insert_template: ./ui/head_insert.html
|
||||
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
||||
# SURT keys are recommended for future indices, but non-SURT cdxs
|
||||
# are also supported
|
||||
#
|
||||
# * Set to true if cdxs start with surts: com,example)/
|
||||
# * Set to false if cdx start with urls: example.com)/
|
||||
surt_ordered: True
|
||||
|
||||
html_query_template: ./ui/query.html
|
||||
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
||||
# in the cdx to their absolute path
|
||||
#
|
||||
# if path is:
|
||||
# * local dir, use path as prefix
|
||||
# * local file, lookup prefix in tab-delimited sorted index
|
||||
# * http:// path, use path as remote prefix
|
||||
# * redis:// path, use redis to lookup full path for w:<warc> as key
|
||||
|
||||
archive_paths:
|
||||
- ./sample_archive/warcs/
|
||||
|
||||
# ui: optional Jinja2 template to insert into <head> of each replay
|
||||
head_insert_html_template: ./ui/head_insert.html
|
||||
|
||||
# ui: optional text to directly insert into <head>
|
||||
# only loaded if ui_head_insert_template_file is not specified
|
||||
|
||||
#head_insert_text: <script src='example.js'></script>
|
||||
|
||||
|
||||
hostpaths: http://localhost:8080/
|
||||
# ui: optional Jinja2 template to use for 'calendar' query,
|
||||
# eg, a listing of captures in response to a ../*/<url>
|
||||
#
|
||||
# may be a simple listing or a more complex 'calendar' UI
|
||||
# if omitted, the capture listing lists raw index
|
||||
calendar_html_template: ./ui/query.html
|
||||
|
||||
|
||||
# list of host names that pywb will be running from to detect
|
||||
# 'fallthrough' requests based on referrer
|
||||
#
|
||||
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
||||
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
||||
# to http://localhost:8080/pywb/image.gif
|
||||
#
|
||||
|
||||
hostpaths: ['http://localhost:8080/']
|
||||
|
||||
|
||||
|
||||
|
@ -11,16 +11,18 @@ import yaml
|
||||
import utils
|
||||
import logging
|
||||
|
||||
## ===========
|
||||
default_head_insert = """
|
||||
#=================================================================
|
||||
## Reference non-YAML config
|
||||
#=================================================================
|
||||
def pywb_config_manual():
|
||||
default_head_insert = """
|
||||
|
||||
<!-- WB Insert -->
|
||||
<script src='/static/wb.js'> </script>
|
||||
<link rel='stylesheet' href='/static/wb.css'/>
|
||||
<!-- End WB Insert -->
|
||||
"""
|
||||
<!-- WB Insert -->
|
||||
<script src='/static/wb.js'> </script>
|
||||
<link rel='stylesheet' href='/static/wb.css'/>
|
||||
<!-- End WB Insert -->
|
||||
"""
|
||||
|
||||
def pywb_config2():
|
||||
# Current test dir
|
||||
#test_dir = utils.test_data_dir()
|
||||
test_dir = './sample_archive/'
|
||||
@ -64,10 +66,20 @@ def pywb_config2():
|
||||
hostpaths = ['http://localhost:8080/'])
|
||||
|
||||
|
||||
def pywb_config(filename = './pywb/config.yaml'):
|
||||
config = yaml.load(open(filename))
|
||||
|
||||
routes = map(yaml_parse_route, config['routes'].iteritems())
|
||||
#=================================================================
|
||||
# YAML config loader
|
||||
#=================================================================
|
||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||
|
||||
|
||||
def pywb_config(config_file = None):
|
||||
if not config_file:
|
||||
config_file = os.environ.get('PYWB_CONFIG', DEFAULT_CONFIG_FILE)
|
||||
|
||||
config = yaml.load(open(config_file))
|
||||
|
||||
routes = map(yaml_parse_route, config['routes'])
|
||||
|
||||
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
||||
|
||||
@ -75,9 +87,6 @@ def pywb_config(filename = './pywb/config.yaml'):
|
||||
|
||||
|
||||
|
||||
def yaml_parse_route((route_name, handler_def)):
|
||||
return Route(route_name, yaml_parse_handler(handler_def))
|
||||
|
||||
|
||||
def yaml_parse_index_loader(index_config):
|
||||
# support mixed cdx streams and remote servers?
|
||||
@ -101,60 +110,56 @@ def yaml_parse_index_loader(index_config):
|
||||
return indexreader.LocalCDXServer([uri])
|
||||
|
||||
|
||||
def yaml_parse_archive_resolvers(archive_paths):
|
||||
|
||||
#TODO: more options (remote files, contains param, etc..)
|
||||
def make_resolver(path):
|
||||
if path.startswith('redis://'):
|
||||
return replay_resolvers.RedisResolver(path)
|
||||
elif os.path.isfile(path):
|
||||
return replay_resolvers.PathIndexResolver(path)
|
||||
else:
|
||||
logging.info('Adding Archive Source: ' + path)
|
||||
return replay_resolvers.PrefixResolver(path)
|
||||
|
||||
return map(make_resolver, archive_paths)
|
||||
|
||||
def yaml_parse_head_insert(handler_def):
|
||||
def yaml_parse_head_insert(config):
|
||||
# First, try a template file
|
||||
head_insert_file = handler_def.get('head_insert_template')
|
||||
head_insert_file = config.get('head_insert_html_template')
|
||||
if head_insert_file:
|
||||
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
||||
return views.J2HeadInsertView(head_insert_file)
|
||||
|
||||
# Then, static head_insert text
|
||||
head_insert_text = handler_def.get('head_insert_text', '')
|
||||
head_insert_text = config.get('head_insert_text', '')
|
||||
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||
return head_insert_text
|
||||
|
||||
|
||||
def yaml_parse_handler(handler_def):
|
||||
archive_loader = archiveloader.ArchiveLoader()
|
||||
|
||||
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
|
||||
|
||||
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
|
||||
|
||||
head_insert = yaml_parse_head_insert(handler_def)
|
||||
|
||||
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||
archiveloader = archive_loader,
|
||||
head_insert = head_insert,
|
||||
buffer_response = handler_def.get('buffer_response', False))
|
||||
|
||||
html_view_file = handler_def.get('html_query_template')
|
||||
def yaml_parse_calendar_view(config):
|
||||
html_view_file = config.get('calendar_html_template')
|
||||
if html_view_file:
|
||||
logging.info('Adding HTML Calendar Template: ' + html_view_file)
|
||||
else:
|
||||
logging.info('No HTML Calendar View Present')
|
||||
|
||||
html_view = views.J2QueryView(html_view_file) if html_view_file else None
|
||||
return views.J2QueryView(html_view_file) if html_view_file else None
|
||||
|
||||
|
||||
|
||||
def yaml_parse_route(config):
|
||||
name = config['name']
|
||||
|
||||
archive_loader = archiveloader.ArchiveLoader()
|
||||
|
||||
index_loader = yaml_parse_index_loader(config['index_paths'])
|
||||
|
||||
archive_resolvers = map(replay_resolvers.make_best_resolver, config['archive_paths'])
|
||||
|
||||
head_insert = yaml_parse_head_insert(config)
|
||||
|
||||
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||
archiveloader = archive_loader,
|
||||
head_insert = head_insert,
|
||||
buffer_response = config.get('buffer_response', False))
|
||||
|
||||
html_view = yaml_parse_calendar_view(config)
|
||||
|
||||
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
||||
return wb_handler
|
||||
|
||||
return Route(name, wb_handler)
|
||||
|
||||
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
pass
|
||||
#print pywb_config('config.yaml')
|
||||
# Just test for execution for now
|
||||
pywb_config(os.path.dirname(os.path.realpath(__file__)) + '/../config.yaml')
|
||||
pywb_config_manual()
|
||||
|
||||
|
||||
|
@ -1,5 +1,10 @@
|
||||
import redis
|
||||
import binsearch
|
||||
|
||||
import urlparse
|
||||
import os
|
||||
import logging
|
||||
|
||||
#======================================
|
||||
# PrefixResolver - convert cdx file entry to url with prefix if url contains specified string
|
||||
#======================================
|
||||
@ -41,3 +46,26 @@ class PathIndexResolver:
|
||||
|
||||
return gen_list(result)
|
||||
|
||||
|
||||
#TODO: more options (remote files, contains param, etc..)
|
||||
# find best resolver given the path
|
||||
def make_best_resolver(path):
|
||||
url_parts = urlparse.urlsplit(path)
|
||||
|
||||
if url_parts.scheme == 'redis':
|
||||
logging.info('Adding Redis Index: ' + path)
|
||||
return RedisResolver(path)
|
||||
|
||||
if url_parts.scheme == 'file':
|
||||
path = url_parts.path
|
||||
|
||||
if os.path.isfile(path):
|
||||
logging.info('Adding Path Index: ' + path)
|
||||
return PathIndexResolver(path)
|
||||
|
||||
# non-file paths always treated as prefix for now
|
||||
else:
|
||||
logging.info('Adding Archive Path Source: ' + path)
|
||||
return PrefixResolver(path)
|
||||
|
||||
|
||||
|
@ -84,18 +84,16 @@ def main():
|
||||
# Attempt to load real settings from globalwb module
|
||||
logging.basicConfig(format = '%(asctime)s: [%(levelname)s]: %(message)s', level = logging.DEBUG)
|
||||
|
||||
config_name = os.environ.get('PYWB_CONFIG')
|
||||
config_name = os.environ.get('PYWB_CONFIG_MODULE')
|
||||
|
||||
if not config_name:
|
||||
config_name = 'pywb.pywb_init'
|
||||
logging.info('PYWB_CONFIG not specified, loading default settings from module "{0}"'.format(config_name))
|
||||
logging.info('Loading from default config module "{0}"'.format(config_name))
|
||||
logging.info('')
|
||||
|
||||
module = importlib.import_module(config_name)
|
||||
|
||||
config_file = DEFAULT_CONFIG_FILE
|
||||
|
||||
app = create_wb_app(module.pywb_config(config_file))
|
||||
app = create_wb_app(module.pywb_config())
|
||||
logging.info('')
|
||||
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
||||
return app
|
||||
@ -107,8 +105,6 @@ def main():
|
||||
|
||||
#=================================================================
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
import pywb_init
|
||||
# Test sample settings
|
||||
application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
|
||||
pass
|
||||
else:
|
||||
application = main()
|
||||
|
9
run.sh
9
run.sh
@ -2,8 +2,13 @@
|
||||
|
||||
mypath=$(cd `dirname $0` && pwd)
|
||||
|
||||
# Setup init module
|
||||
#export 'PYWB_CONFIG=globalwb'
|
||||
# Set a different config file
|
||||
#export 'PYWB_CONFIG=myconfig.yaml'
|
||||
|
||||
# Set alternate init module
|
||||
# The modules pywb_config()
|
||||
# ex: my_pywb.pywb_config()
|
||||
#export 'PYWB_CONFIG=my_pywb'
|
||||
|
||||
app="pywb.wbapp"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user