# pywb config file # ======================================== # # Settings for each collection collections: # : # collection will be accessed via / # is a string or list of: # - string or list of one or more local .cdx file # - string or list of one or more local dirs with .cdx files # - a string value indicating remote http cdx server pywb: ./sample_archive/cdx/ # ex with filtering: filter CDX lines by filename starting with 'dupe' pywb-filt: index_paths: './sample_archive/cdx/' filters: ['filename:dupe*'] pywb-filt-2: index_paths: './sample_archive/cdx/' filters: ['!filename:dupe*'] pywb-nonframe: index_paths: './sample_archive/cdx/' framed_replay: false # collection of non-surt CDX pywb-nosurt: index_paths: './sample_archive/non-surt-cdx/' surt_ordered: false # live collection live: $liveweb # coll with fallback pywb-fallback: index_paths: ./sample_archive/cdx/ fallback: live pywb-norange: index_paths: ./sample_archive/cdx/ enable_ranges: false pywb-non-exact: index_paths: ./sample_archive/cdx/ redir_to_exact: false pywb-cdxj: index_paths: ./sample_archive/cdxj/ # indicate if cdx files are sorted by SURT keys -- eg: com,example)/ # SURT keys are recommended for future indices, but non-SURT cdxs # are also supported # # * Set to true if cdxs start with surts: com,example)/ # * Set to false if cdx start with urls: example.com)/ surt_ordered: true # list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames # in the cdx to their absolute path # # if path is: # * local dir, use path as prefix # * local file, lookup prefix in tab-delimited sorted index # * http:// path, use path as remote prefix # * redis:// path, use redis to lookup full path for w: as key archive_paths: ['./invalid/path/to/ignore/', './sample_archive/warcs/'] # ==== Optional UI: HTML/Jinja2 Templates ==== # template for insert into replayed html content head_insert_html: templates/head_insert.html # template to for 'calendar' query, # eg, a listing of captures in response to a ../*/ # # may be a simple listing or a more complex 'calendar' UI # if omitted, will list raw cdx in plain text query_html: templates/query.html # template for search page, which is displayed when no search url is entered # in a collection search_html: templates/search.html # template for home page. # if no other route is set, this will be rendered at /, /index.htm and /index.html home_html: templates/index.html # error page temlpate for may formatting error message and details # if omitted, a text response is returned error_html: templates/error.html # template for 404 not found error, may be customized per collection not_found_html: templates/not_found.html # ==== Other Paths ==== # Rewrite urls with absolute paths instead of relative absoulte_paths: true # List of route names: # : static_routes: static/test/route: pywb/static/ static/__pywb: pywb/static/ # Enable simple http proxy mode enable_http_proxy: true # Additional proxy options (defaults) proxy_options: use_default_coll: pywb cookie_resolver: false use_client_rewrite: true use_wombat: true #enable coll info JSON enable_coll_info: true # enable cdx server api for querying cdx directly (experimental) #enable_cdx_api: True # or specify suffix enable_cdx_api: -cdx # test different port port: 9000 # optional reporter callback func # if set, called with request and cdx object reporter: !!python/object/new:tests.fixture.PrintReporter [] # custom rules for domain specific matching #domain_specific_rules: rules.yaml # Use lxml parser, if available # use_lxml_parser: true # Replay content in an iframe framed_replay: true # ==== New / Experimental Settings ==== # Not yet production ready -- used primarily for testing #perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms [] perms_policy: !!python/name:tests.perms_fixture.perms_policy # not testing memento here enable_memento: False # Debug Handlers debug_echo_env: True debug_echo_req: True