mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
cleanup pywb_init, work on documenting config.yaml!
This commit is contained in:
parent
43a46b373d
commit
411e7fe8a3
66
config.yaml
66
config.yaml
@ -1,17 +1,67 @@
|
|||||||
|
# pywb config file
|
||||||
|
# ========================================
|
||||||
|
#
|
||||||
|
# Settings for each route are defined below
|
||||||
|
# Each route may be an archival collection or other handler
|
||||||
|
#
|
||||||
routes:
|
routes:
|
||||||
pywb:
|
# route name (eg /pywb)
|
||||||
index_paths:
|
- name: pywb
|
||||||
- ./sample_archive/cdx/
|
|
||||||
|
|
||||||
archive_paths:
|
# list of paths to search cdx files
|
||||||
- ./sample_archive/warcs/
|
# * local .cdx file
|
||||||
|
# * local dir, will include all .cdx files in dir
|
||||||
|
#
|
||||||
|
# or a string value indicating remote http cdx server
|
||||||
|
index_paths:
|
||||||
|
- ./sample_archive/cdx/
|
||||||
|
|
||||||
head_insert_template: ./ui/head_insert.html
|
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
||||||
|
# SURT keys are recommended for future indices, but non-SURT cdxs
|
||||||
|
# are also supported
|
||||||
|
#
|
||||||
|
# * Set to true if cdxs start with surts: com,example)/
|
||||||
|
# * Set to false if cdx start with urls: example.com)/
|
||||||
|
surt_ordered: True
|
||||||
|
|
||||||
html_query_template: ./ui/query.html
|
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
||||||
|
# in the cdx to their absolute path
|
||||||
|
#
|
||||||
|
# if path is:
|
||||||
|
# * local dir, use path as prefix
|
||||||
|
# * local file, lookup prefix in tab-delimited sorted index
|
||||||
|
# * http:// path, use path as remote prefix
|
||||||
|
# * redis:// path, use redis to lookup full path for w:<warc> as key
|
||||||
|
|
||||||
|
archive_paths:
|
||||||
|
- ./sample_archive/warcs/
|
||||||
|
|
||||||
|
# ui: optional Jinja2 template to insert into <head> of each replay
|
||||||
|
head_insert_html_template: ./ui/head_insert.html
|
||||||
|
|
||||||
|
# ui: optional text to directly insert into <head>
|
||||||
|
# only loaded if ui_head_insert_template_file is not specified
|
||||||
|
|
||||||
|
#head_insert_text: <script src='example.js'></script>
|
||||||
|
|
||||||
|
|
||||||
hostpaths: http://localhost:8080/
|
# ui: optional Jinja2 template to use for 'calendar' query,
|
||||||
|
# eg, a listing of captures in response to a ../*/<url>
|
||||||
|
#
|
||||||
|
# may be a simple listing or a more complex 'calendar' UI
|
||||||
|
# if omitted, the capture listing lists raw index
|
||||||
|
calendar_html_template: ./ui/query.html
|
||||||
|
|
||||||
|
|
||||||
|
# list of host names that pywb will be running from to detect
|
||||||
|
# 'fallthrough' requests based on referrer
|
||||||
|
#
|
||||||
|
# eg: an incorrect request for http://localhost:8080/image.gif with a referrer
|
||||||
|
# of http://localhost:8080/pywb/index.html, pywb can correctly redirect
|
||||||
|
# to http://localhost:8080/pywb/image.gif
|
||||||
|
#
|
||||||
|
|
||||||
|
hostpaths: ['http://localhost:8080/']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,16 +11,18 @@ import yaml
|
|||||||
import utils
|
import utils
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
## ===========
|
#=================================================================
|
||||||
default_head_insert = """
|
## Reference non-YAML config
|
||||||
|
#=================================================================
|
||||||
|
def pywb_config_manual():
|
||||||
|
default_head_insert = """
|
||||||
|
|
||||||
<!-- WB Insert -->
|
<!-- WB Insert -->
|
||||||
<script src='/static/wb.js'> </script>
|
<script src='/static/wb.js'> </script>
|
||||||
<link rel='stylesheet' href='/static/wb.css'/>
|
<link rel='stylesheet' href='/static/wb.css'/>
|
||||||
<!-- End WB Insert -->
|
<!-- End WB Insert -->
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def pywb_config2():
|
|
||||||
# Current test dir
|
# Current test dir
|
||||||
#test_dir = utils.test_data_dir()
|
#test_dir = utils.test_data_dir()
|
||||||
test_dir = './sample_archive/'
|
test_dir = './sample_archive/'
|
||||||
@ -64,10 +66,20 @@ def pywb_config2():
|
|||||||
hostpaths = ['http://localhost:8080/'])
|
hostpaths = ['http://localhost:8080/'])
|
||||||
|
|
||||||
|
|
||||||
def pywb_config(filename = './pywb/config.yaml'):
|
|
||||||
config = yaml.load(open(filename))
|
|
||||||
|
|
||||||
routes = map(yaml_parse_route, config['routes'].iteritems())
|
#=================================================================
|
||||||
|
# YAML config loader
|
||||||
|
#=================================================================
|
||||||
|
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||||
|
|
||||||
|
|
||||||
|
def pywb_config(config_file = None):
|
||||||
|
if not config_file:
|
||||||
|
config_file = os.environ.get('PYWB_CONFIG', DEFAULT_CONFIG_FILE)
|
||||||
|
|
||||||
|
config = yaml.load(open(config_file))
|
||||||
|
|
||||||
|
routes = map(yaml_parse_route, config['routes'])
|
||||||
|
|
||||||
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
||||||
|
|
||||||
@ -75,9 +87,6 @@ def pywb_config(filename = './pywb/config.yaml'):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def yaml_parse_route((route_name, handler_def)):
|
|
||||||
return Route(route_name, yaml_parse_handler(handler_def))
|
|
||||||
|
|
||||||
|
|
||||||
def yaml_parse_index_loader(index_config):
|
def yaml_parse_index_loader(index_config):
|
||||||
# support mixed cdx streams and remote servers?
|
# support mixed cdx streams and remote servers?
|
||||||
@ -101,60 +110,56 @@ def yaml_parse_index_loader(index_config):
|
|||||||
return indexreader.LocalCDXServer([uri])
|
return indexreader.LocalCDXServer([uri])
|
||||||
|
|
||||||
|
|
||||||
def yaml_parse_archive_resolvers(archive_paths):
|
def yaml_parse_head_insert(config):
|
||||||
|
|
||||||
#TODO: more options (remote files, contains param, etc..)
|
|
||||||
def make_resolver(path):
|
|
||||||
if path.startswith('redis://'):
|
|
||||||
return replay_resolvers.RedisResolver(path)
|
|
||||||
elif os.path.isfile(path):
|
|
||||||
return replay_resolvers.PathIndexResolver(path)
|
|
||||||
else:
|
|
||||||
logging.info('Adding Archive Source: ' + path)
|
|
||||||
return replay_resolvers.PrefixResolver(path)
|
|
||||||
|
|
||||||
return map(make_resolver, archive_paths)
|
|
||||||
|
|
||||||
def yaml_parse_head_insert(handler_def):
|
|
||||||
# First, try a template file
|
# First, try a template file
|
||||||
head_insert_file = handler_def.get('head_insert_template')
|
head_insert_file = config.get('head_insert_html_template')
|
||||||
if head_insert_file:
|
if head_insert_file:
|
||||||
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
||||||
return views.J2HeadInsertView(head_insert_file)
|
return views.J2HeadInsertView(head_insert_file)
|
||||||
|
|
||||||
# Then, static head_insert text
|
# Then, static head_insert text
|
||||||
head_insert_text = handler_def.get('head_insert_text', '')
|
head_insert_text = config.get('head_insert_text', '')
|
||||||
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||||
return head_insert_text
|
return head_insert_text
|
||||||
|
|
||||||
|
|
||||||
def yaml_parse_handler(handler_def):
|
def yaml_parse_calendar_view(config):
|
||||||
archive_loader = archiveloader.ArchiveLoader()
|
html_view_file = config.get('calendar_html_template')
|
||||||
|
|
||||||
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
|
|
||||||
|
|
||||||
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
|
|
||||||
|
|
||||||
head_insert = yaml_parse_head_insert(handler_def)
|
|
||||||
|
|
||||||
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
|
||||||
archiveloader = archive_loader,
|
|
||||||
head_insert = head_insert,
|
|
||||||
buffer_response = handler_def.get('buffer_response', False))
|
|
||||||
|
|
||||||
html_view_file = handler_def.get('html_query_template')
|
|
||||||
if html_view_file:
|
if html_view_file:
|
||||||
logging.info('Adding HTML Calendar Template: ' + html_view_file)
|
logging.info('Adding HTML Calendar Template: ' + html_view_file)
|
||||||
else:
|
else:
|
||||||
logging.info('No HTML Calendar View Present')
|
logging.info('No HTML Calendar View Present')
|
||||||
|
|
||||||
html_view = views.J2QueryView(html_view_file) if html_view_file else None
|
return views.J2QueryView(html_view_file) if html_view_file else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_parse_route(config):
|
||||||
|
name = config['name']
|
||||||
|
|
||||||
|
archive_loader = archiveloader.ArchiveLoader()
|
||||||
|
|
||||||
|
index_loader = yaml_parse_index_loader(config['index_paths'])
|
||||||
|
|
||||||
|
archive_resolvers = map(replay_resolvers.make_best_resolver, config['archive_paths'])
|
||||||
|
|
||||||
|
head_insert = yaml_parse_head_insert(config)
|
||||||
|
|
||||||
|
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||||
|
archiveloader = archive_loader,
|
||||||
|
head_insert = head_insert,
|
||||||
|
buffer_response = config.get('buffer_response', False))
|
||||||
|
|
||||||
|
html_view = yaml_parse_calendar_view(config)
|
||||||
|
|
||||||
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
||||||
return wb_handler
|
|
||||||
|
return Route(name, wb_handler)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__" or utils.enable_doctests():
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
pass
|
# Just test for execution for now
|
||||||
#print pywb_config('config.yaml')
|
pywb_config(os.path.dirname(os.path.realpath(__file__)) + '/../config.yaml')
|
||||||
|
pywb_config_manual()
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
import redis
|
import redis
|
||||||
import binsearch
|
import binsearch
|
||||||
|
|
||||||
|
import urlparse
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
#======================================
|
#======================================
|
||||||
# PrefixResolver - convert cdx file entry to url with prefix if url contains specified string
|
# PrefixResolver - convert cdx file entry to url with prefix if url contains specified string
|
||||||
#======================================
|
#======================================
|
||||||
@ -41,3 +46,26 @@ class PathIndexResolver:
|
|||||||
|
|
||||||
return gen_list(result)
|
return gen_list(result)
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: more options (remote files, contains param, etc..)
|
||||||
|
# find best resolver given the path
|
||||||
|
def make_best_resolver(path):
|
||||||
|
url_parts = urlparse.urlsplit(path)
|
||||||
|
|
||||||
|
if url_parts.scheme == 'redis':
|
||||||
|
logging.info('Adding Redis Index: ' + path)
|
||||||
|
return RedisResolver(path)
|
||||||
|
|
||||||
|
if url_parts.scheme == 'file':
|
||||||
|
path = url_parts.path
|
||||||
|
|
||||||
|
if os.path.isfile(path):
|
||||||
|
logging.info('Adding Path Index: ' + path)
|
||||||
|
return PathIndexResolver(path)
|
||||||
|
|
||||||
|
# non-file paths always treated as prefix for now
|
||||||
|
else:
|
||||||
|
logging.info('Adding Archive Path Source: ' + path)
|
||||||
|
return PrefixResolver(path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,18 +84,16 @@ def main():
|
|||||||
# Attempt to load real settings from globalwb module
|
# Attempt to load real settings from globalwb module
|
||||||
logging.basicConfig(format = '%(asctime)s: [%(levelname)s]: %(message)s', level = logging.DEBUG)
|
logging.basicConfig(format = '%(asctime)s: [%(levelname)s]: %(message)s', level = logging.DEBUG)
|
||||||
|
|
||||||
config_name = os.environ.get('PYWB_CONFIG')
|
config_name = os.environ.get('PYWB_CONFIG_MODULE')
|
||||||
|
|
||||||
if not config_name:
|
if not config_name:
|
||||||
config_name = 'pywb.pywb_init'
|
config_name = 'pywb.pywb_init'
|
||||||
logging.info('PYWB_CONFIG not specified, loading default settings from module "{0}"'.format(config_name))
|
logging.info('Loading from default config module "{0}"'.format(config_name))
|
||||||
logging.info('')
|
logging.info('')
|
||||||
|
|
||||||
module = importlib.import_module(config_name)
|
module = importlib.import_module(config_name)
|
||||||
|
|
||||||
config_file = DEFAULT_CONFIG_FILE
|
app = create_wb_app(module.pywb_config())
|
||||||
|
|
||||||
app = create_wb_app(module.pywb_config(config_file))
|
|
||||||
logging.info('')
|
logging.info('')
|
||||||
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
||||||
return app
|
return app
|
||||||
@ -107,8 +105,6 @@ def main():
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
if __name__ == "__main__" or utils.enable_doctests():
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
import pywb_init
|
pass
|
||||||
# Test sample settings
|
|
||||||
application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
|
|
||||||
else:
|
else:
|
||||||
application = main()
|
application = main()
|
||||||
|
9
run.sh
9
run.sh
@ -2,8 +2,13 @@
|
|||||||
|
|
||||||
mypath=$(cd `dirname $0` && pwd)
|
mypath=$(cd `dirname $0` && pwd)
|
||||||
|
|
||||||
# Setup init module
|
# Set a different config file
|
||||||
#export 'PYWB_CONFIG=globalwb'
|
#export 'PYWB_CONFIG=myconfig.yaml'
|
||||||
|
|
||||||
|
# Set alternate init module
|
||||||
|
# The modules pywb_config()
|
||||||
|
# ex: my_pywb.pywb_config()
|
||||||
|
#export 'PYWB_CONFIG=my_pywb'
|
||||||
|
|
||||||
app="pywb.wbapp"
|
app="pywb.wbapp"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user