2014-02-05 10:17:06 -08:00
|
|
|
# pywb config file
|
|
|
|
# ========================================
|
|
|
|
#
|
|
|
|
# Settings for each collection
|
|
|
|
|
|
|
|
collections:
|
|
|
|
# <name>: <cdx_path>
|
|
|
|
# collection will be accessed via /<name>
|
|
|
|
# <cdx_path> is a string or list of:
|
|
|
|
# - string or list of one or more local .cdx file
|
|
|
|
# - string or list of one or more local dirs with .cdx files
|
|
|
|
# - a string value indicating remote http cdx server
|
|
|
|
pywb: ./sample_archive/cdx/
|
|
|
|
|
2014-02-06 17:28:08 -08:00
|
|
|
# ex with filtering: filter CDX lines by filename starting with 'dupe'
|
2014-06-14 18:26:19 -07:00
|
|
|
pywb-filt:
|
|
|
|
index_paths: './sample_archive/cdx/'
|
|
|
|
filters: ['filename:dupe*']
|
2014-12-23 11:09:19 -08:00
|
|
|
|
2014-07-10 21:54:23 -07:00
|
|
|
pywb-filt-2:
|
|
|
|
index_paths: './sample_archive/cdx/'
|
|
|
|
filters: ['!filename:dupe*']
|
|
|
|
|
2014-06-14 18:26:19 -07:00
|
|
|
pywb-nonframe:
|
|
|
|
index_paths: './sample_archive/cdx/'
|
|
|
|
framed_replay: false
|
2014-02-06 17:28:08 -08:00
|
|
|
|
2014-05-16 21:21:14 -07:00
|
|
|
# collection of non-surt CDX
|
2014-12-23 11:09:19 -08:00
|
|
|
pywb-nosurt:
|
2014-06-14 18:26:19 -07:00
|
|
|
index_paths: './sample_archive/non-surt-cdx/'
|
|
|
|
surt_ordered: false
|
2014-05-16 21:16:50 -07:00
|
|
|
|
2014-07-20 18:25:47 -07:00
|
|
|
# live collection
|
|
|
|
live: $liveweb
|
|
|
|
|
|
|
|
# coll with fallback
|
|
|
|
pywb-fallback:
|
|
|
|
index_paths: ./sample_archive/cdx/
|
|
|
|
fallback: live
|
|
|
|
|
2014-12-23 14:34:59 -08:00
|
|
|
pywb-norange:
|
2014-12-23 11:09:19 -08:00
|
|
|
index_paths: ./sample_archive/cdx/
|
2015-02-17 17:47:30 -08:00
|
|
|
enable_ranges: false
|
|
|
|
|
|
|
|
pywb-non-exact:
|
|
|
|
index_paths: ./sample_archive/cdx/
|
|
|
|
redir_to_exact: false
|
|
|
|
|
2015-03-19 11:20:40 -07:00
|
|
|
pywb-cdxj:
|
|
|
|
index_paths: ./sample_archive/cdxj/
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2014-02-06 17:28:08 -08:00
|
|
|
|
2014-02-05 10:17:06 -08:00
|
|
|
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
|
|
|
# SURT keys are recommended for future indices, but non-SURT cdxs
|
|
|
|
# are also supported
|
|
|
|
#
|
|
|
|
# * Set to true if cdxs start with surts: com,example)/
|
|
|
|
# * Set to false if cdx start with urls: example.com)/
|
|
|
|
surt_ordered: true
|
|
|
|
|
|
|
|
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
|
|
|
# in the cdx to their absolute path
|
|
|
|
#
|
|
|
|
# if path is:
|
|
|
|
# * local dir, use path as prefix
|
|
|
|
# * local file, lookup prefix in tab-delimited sorted index
|
|
|
|
# * http:// path, use path as remote prefix
|
|
|
|
# * redis:// path, use redis to lookup full path for w:<warc> as key
|
|
|
|
|
2014-02-11 14:10:40 -08:00
|
|
|
archive_paths: ['./invalid/path/to/ignore/', './sample_archive/warcs/']
|
2014-02-05 10:17:06 -08:00
|
|
|
|
|
|
|
# ==== Optional UI: HTML/Jinja2 Templates ====
|
|
|
|
|
|
|
|
# template for <head> insert into replayed html content
|
2015-03-16 18:48:09 -07:00
|
|
|
head_insert_html: templates/head_insert.html
|
2014-02-05 10:17:06 -08:00
|
|
|
|
|
|
|
# template to for 'calendar' query,
|
|
|
|
# eg, a listing of captures in response to a ../*/<url>
|
|
|
|
#
|
|
|
|
# may be a simple listing or a more complex 'calendar' UI
|
|
|
|
# if omitted, will list raw cdx in plain text
|
2015-03-16 18:48:09 -07:00
|
|
|
query_html: templates/query.html
|
2014-02-05 10:17:06 -08:00
|
|
|
|
|
|
|
# template for search page, which is displayed when no search url is entered
|
|
|
|
# in a collection
|
2015-03-16 18:48:09 -07:00
|
|
|
search_html: templates/search.html
|
2014-02-05 10:17:06 -08:00
|
|
|
|
|
|
|
# template for home page.
|
|
|
|
# if no other route is set, this will be rendered at /, /index.htm and /index.html
|
2015-03-16 18:48:09 -07:00
|
|
|
home_html: templates/index.html
|
2014-02-05 10:17:06 -08:00
|
|
|
|
|
|
|
|
|
|
|
# error page temlpate for may formatting error message and details
|
|
|
|
# if omitted, a text response is returned
|
2015-03-16 18:48:09 -07:00
|
|
|
error_html: templates/error.html
|
2014-02-05 10:17:06 -08:00
|
|
|
|
2015-01-24 12:32:50 -08:00
|
|
|
|
|
|
|
# template for 404 not found error, may be customized per collection
|
2015-03-16 18:48:09 -07:00
|
|
|
not_found_html: templates/not_found.html
|
2015-01-24 12:32:50 -08:00
|
|
|
|
2014-02-05 10:17:06 -08:00
|
|
|
# ==== Other Paths ====
|
|
|
|
|
2014-02-08 20:07:16 -08:00
|
|
|
# Rewrite urls with absolute paths instead of relative
|
|
|
|
absoulte_paths: true
|
2014-02-07 19:32:58 -08:00
|
|
|
|
|
|
|
# List of route names:
|
|
|
|
# <route>: <package or file path>
|
|
|
|
static_routes:
|
2014-03-14 18:17:22 -07:00
|
|
|
static/test/route: pywb/static/
|
2015-03-23 16:15:37 -07:00
|
|
|
static/__pywb: pywb/static/
|
2014-02-05 10:17:06 -08:00
|
|
|
|
2014-02-05 13:08:10 -08:00
|
|
|
# Enable simple http proxy mode
|
|
|
|
enable_http_proxy: true
|
|
|
|
|
2014-07-21 16:42:14 -07:00
|
|
|
# Additional proxy options (defaults)
|
|
|
|
proxy_options:
|
2016-02-23 18:09:09 -08:00
|
|
|
use_default_coll: pywb
|
2014-07-21 16:42:14 -07:00
|
|
|
|
2014-09-06 17:03:04 -07:00
|
|
|
cookie_resolver: false
|
|
|
|
|
2015-08-15 19:14:59 -07:00
|
|
|
use_client_rewrite: true
|
|
|
|
use_wombat: true
|
2014-07-21 16:42:14 -07:00
|
|
|
|
2015-11-04 15:36:44 -08:00
|
|
|
|
|
|
|
#enable coll info JSON
|
|
|
|
enable_coll_info: true
|
|
|
|
|
2014-02-05 10:17:06 -08:00
|
|
|
# enable cdx server api for querying cdx directly (experimental)
|
2014-05-16 22:17:51 -07:00
|
|
|
#enable_cdx_api: True
|
|
|
|
# or specify suffix
|
|
|
|
enable_cdx_api: -cdx
|
2014-02-11 14:10:40 -08:00
|
|
|
|
2014-03-03 10:35:57 -08:00
|
|
|
# test different port
|
|
|
|
port: 9000
|
|
|
|
|
2014-02-11 14:10:40 -08:00
|
|
|
# optional reporter callback func
|
|
|
|
# if set, called with request and cdx object
|
2014-02-28 01:39:04 +00:00
|
|
|
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
2014-02-18 14:47:48 -08:00
|
|
|
|
|
|
|
# custom rules for domain specific matching
|
|
|
|
#domain_specific_rules: rules.yaml
|
2014-02-19 20:20:31 -08:00
|
|
|
|
2014-06-14 18:26:19 -07:00
|
|
|
# Use lxml parser, if available
|
2014-06-27 19:03:06 -07:00
|
|
|
# use_lxml_parser: true
|
2014-06-14 18:26:19 -07:00
|
|
|
|
|
|
|
# Replay content in an iframe
|
|
|
|
framed_replay: true
|
|
|
|
|
2014-03-14 10:46:20 -07:00
|
|
|
# ==== New / Experimental Settings ====
|
|
|
|
# Not yet production ready -- used primarily for testing
|
|
|
|
|
2014-02-19 20:20:31 -08:00
|
|
|
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
|
2014-04-04 10:09:26 -07:00
|
|
|
perms_policy: !!python/name:tests.perms_fixture.perms_policy
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
# not testing memento here
|
|
|
|
enable_memento: False
|
2014-05-16 22:17:51 -07:00
|
|
|
|
|
|
|
|
|
|
|
# Debug Handlers
|
|
|
|
debug_echo_env: True
|
|
|
|
|
|
|
|
debug_echo_req: True
|