mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
the value fo collinfo.json template. Default template returns an entry for each handler route, including the route path (id), title (name) and memento timegate and timemap paths, to be used with an aggregator. Using a custom 'info_json' template can specify a different collinfo template, alternative to #69 (local aggregation) Closes #146
163 lines
4.2 KiB
YAML
163 lines
4.2 KiB
YAML
# pywb config file
|
|
# ========================================
|
|
#
|
|
# Settings for each collection
|
|
|
|
collections:
|
|
# <name>: <cdx_path>
|
|
# collection will be accessed via /<name>
|
|
# <cdx_path> is a string or list of:
|
|
# - string or list of one or more local .cdx file
|
|
# - string or list of one or more local dirs with .cdx files
|
|
# - a string value indicating remote http cdx server
|
|
pywb: ./sample_archive/cdx/
|
|
|
|
# ex with filtering: filter CDX lines by filename starting with 'dupe'
|
|
pywb-filt:
|
|
index_paths: './sample_archive/cdx/'
|
|
filters: ['filename:dupe*']
|
|
|
|
pywb-filt-2:
|
|
index_paths: './sample_archive/cdx/'
|
|
filters: ['!filename:dupe*']
|
|
|
|
pywb-nonframe:
|
|
index_paths: './sample_archive/cdx/'
|
|
framed_replay: false
|
|
|
|
# collection of non-surt CDX
|
|
pywb-nosurt:
|
|
index_paths: './sample_archive/non-surt-cdx/'
|
|
surt_ordered: false
|
|
|
|
# live collection
|
|
live: $liveweb
|
|
|
|
# coll with fallback
|
|
pywb-fallback:
|
|
index_paths: ./sample_archive/cdx/
|
|
fallback: live
|
|
|
|
pywb-norange:
|
|
index_paths: ./sample_archive/cdx/
|
|
enable_ranges: false
|
|
|
|
pywb-non-exact:
|
|
index_paths: ./sample_archive/cdx/
|
|
redir_to_exact: false
|
|
|
|
pywb-cdxj:
|
|
index_paths: ./sample_archive/cdxj/
|
|
|
|
|
|
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
|
# SURT keys are recommended for future indices, but non-SURT cdxs
|
|
# are also supported
|
|
#
|
|
# * Set to true if cdxs start with surts: com,example)/
|
|
# * Set to false if cdx start with urls: example.com)/
|
|
surt_ordered: true
|
|
|
|
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
|
# in the cdx to their absolute path
|
|
#
|
|
# if path is:
|
|
# * local dir, use path as prefix
|
|
# * local file, lookup prefix in tab-delimited sorted index
|
|
# * http:// path, use path as remote prefix
|
|
# * redis:// path, use redis to lookup full path for w:<warc> as key
|
|
|
|
archive_paths: ['./invalid/path/to/ignore/', './sample_archive/warcs/']
|
|
|
|
# ==== Optional UI: HTML/Jinja2 Templates ====
|
|
|
|
# template for <head> insert into replayed html content
|
|
head_insert_html: templates/head_insert.html
|
|
|
|
# template to for 'calendar' query,
|
|
# eg, a listing of captures in response to a ../*/<url>
|
|
#
|
|
# may be a simple listing or a more complex 'calendar' UI
|
|
# if omitted, will list raw cdx in plain text
|
|
query_html: templates/query.html
|
|
|
|
# template for search page, which is displayed when no search url is entered
|
|
# in a collection
|
|
search_html: templates/search.html
|
|
|
|
# template for home page.
|
|
# if no other route is set, this will be rendered at /, /index.htm and /index.html
|
|
home_html: templates/index.html
|
|
|
|
|
|
# error page temlpate for may formatting error message and details
|
|
# if omitted, a text response is returned
|
|
error_html: templates/error.html
|
|
|
|
|
|
# template for 404 not found error, may be customized per collection
|
|
not_found_html: templates/not_found.html
|
|
|
|
# ==== Other Paths ====
|
|
|
|
# Rewrite urls with absolute paths instead of relative
|
|
absoulte_paths: true
|
|
|
|
# List of route names:
|
|
# <route>: <package or file path>
|
|
static_routes:
|
|
static/test/route: pywb/static/
|
|
static/__pywb: pywb/static/
|
|
|
|
# Enable simple http proxy mode
|
|
enable_http_proxy: true
|
|
|
|
# Additional proxy options (defaults)
|
|
proxy_options:
|
|
use_default_coll: true
|
|
|
|
cookie_resolver: false
|
|
|
|
use_client_rewrite: true
|
|
use_wombat: true
|
|
|
|
|
|
#enable coll info JSON
|
|
enable_coll_info: true
|
|
|
|
# enable cdx server api for querying cdx directly (experimental)
|
|
#enable_cdx_api: True
|
|
# or specify suffix
|
|
enable_cdx_api: -cdx
|
|
|
|
# test different port
|
|
port: 9000
|
|
|
|
# optional reporter callback func
|
|
# if set, called with request and cdx object
|
|
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
|
|
|
# custom rules for domain specific matching
|
|
#domain_specific_rules: rules.yaml
|
|
|
|
# Use lxml parser, if available
|
|
# use_lxml_parser: true
|
|
|
|
# Replay content in an iframe
|
|
framed_replay: true
|
|
|
|
# ==== New / Experimental Settings ====
|
|
# Not yet production ready -- used primarily for testing
|
|
|
|
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
|
|
perms_policy: !!python/name:tests.perms_fixture.perms_policy
|
|
|
|
# not testing memento here
|
|
enable_memento: False
|
|
|
|
|
|
# Debug Handlers
|
|
debug_echo_env: True
|
|
|
|
debug_echo_req: True
|