2014-01-29 00:03:24 -08:00
|
|
|
# pywb config file
|
|
|
|
# ========================================
|
|
|
|
#
|
2014-02-03 09:24:40 -08:00
|
|
|
# Settings for each collection
|
|
|
|
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
|
2014-02-03 09:24:40 -08:00
|
|
|
collections:
|
|
|
|
# <name>: <cdx_path>
|
|
|
|
# collection will be accessed via /<name>
|
|
|
|
# <cdx_path> is a string or list of:
|
|
|
|
# - string or list of one or more local .cdx file
|
|
|
|
# - string or list of one or more local dirs with .cdx files
|
|
|
|
# - a string value indicating remote http cdx server
|
|
|
|
pywb: ./sample_archive/cdx/
|
|
|
|
|
2014-02-06 17:28:08 -08:00
|
|
|
# ex with filtering: filter CDX lines by filename starting with 'dupe'
|
|
|
|
#pywb-filt: {'index_paths': './sample_archive/cdx/', 'filters': ['filename:dupe*']}
|
|
|
|
|
2014-02-03 09:24:40 -08:00
|
|
|
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
|
|
|
|
# SURT keys are recommended for future indices, but non-SURT cdxs
|
|
|
|
# are also supported
|
2014-01-29 00:03:24 -08:00
|
|
|
#
|
2014-02-03 09:24:40 -08:00
|
|
|
# * Set to true if cdxs start with surts: com,example)/
|
|
|
|
# * Set to false if cdx start with urls: example.com)/
|
2014-02-07 19:32:58 -08:00
|
|
|
#
|
|
|
|
# default:
|
|
|
|
# surt_ordered: true
|
2014-01-28 22:03:01 -08:00
|
|
|
|
2014-02-03 09:24:40 -08:00
|
|
|
# list of paths prefixes for pywb look to 'resolve' WARC and ARC filenames
|
|
|
|
# in the cdx to their absolute path
|
|
|
|
#
|
|
|
|
# if path is:
|
|
|
|
# * local dir, use path as prefix
|
|
|
|
# * local file, lookup prefix in tab-delimited sorted index
|
|
|
|
# * http:// path, use path as remote prefix
|
|
|
|
# * redis:// path, use redis to lookup full path for w:<warc> as key
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-02-03 09:24:40 -08:00
|
|
|
archive_paths: ./sample_archive/warcs/
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-09-06 17:03:04 -07:00
|
|
|
# ==== Proxy Mode ====
|
|
|
|
# Enable simple http proxy mode
|
|
|
|
enable_http_proxy: true
|
|
|
|
|
|
|
|
# Additional proxy options (defaults)
|
|
|
|
# proxy_options:
|
|
|
|
# enable HTTPS proxy support (requires openssl library)
|
|
|
|
# enable_https_proxy: false
|
|
|
|
#
|
|
|
|
# use cookies to switch collections and capture times
|
|
|
|
# if not enabled, requires use of proxy auth
|
|
|
|
# cookie_resolver: true
|
|
|
|
#
|
|
|
|
# default collection to start out in proxy mode
|
|
|
|
# if not set, will ask the first time
|
|
|
|
# use_default_coll: pywb
|
|
|
|
|
|
|
|
# use wildcard certificates when creating certs in proxy mode
|
|
|
|
# helps lower number of certs created, by may not be compatible
|
|
|
|
# with older libraries
|
2014-09-07 11:58:03 -07:00
|
|
|
# use_wildcard_certs: true
|
2014-09-06 17:03:04 -07:00
|
|
|
|
|
|
|
# if true, will not add any banner to proxy mode replay
|
2014-09-07 11:58:03 -07:00
|
|
|
# unaltered_replay: false
|
2014-09-06 17:03:04 -07:00
|
|
|
|
2014-09-07 11:58:03 -07:00
|
|
|
# Default settings for CA used by proxy mode:
|
2014-09-06 17:03:04 -07:00
|
|
|
# root_ca_file: ./ca/pywb-ca.pem
|
|
|
|
# root_ca_name: pywb https proxy replay CA
|
|
|
|
# certs_dir: ./ca/certs
|
2014-02-07 19:32:58 -08:00
|
|
|
|
|
|
|
# ==== UI: HTML/Jinja2 Templates ====
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-09-06 17:03:04 -07:00
|
|
|
# The following are default settings -- uncomment to change
|
|
|
|
# Set to '' to disable the ui
|
|
|
|
|
2014-02-05 10:10:33 -08:00
|
|
|
# template for <head> insert into replayed html content
|
2014-02-07 19:32:58 -08:00
|
|
|
#head_insert_html: ui/head_insert.html
|
2014-10-17 08:28:06 -07:00
|
|
|
#
|
|
|
|
#
|
|
|
|
# template for just the banner modifications
|
|
|
|
# set to False to disable completely
|
|
|
|
#banner_html: banner.html
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-02-05 10:10:33 -08:00
|
|
|
# template to for 'calendar' query,
|
2014-02-03 09:24:40 -08:00
|
|
|
# eg, a listing of captures in response to a ../*/<url>
|
|
|
|
#
|
|
|
|
# may be a simple listing or a more complex 'calendar' UI
|
2014-02-05 10:10:33 -08:00
|
|
|
# if omitted, will list raw cdx in plain text
|
2014-02-07 19:32:58 -08:00
|
|
|
#query_html: ui/query.html
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-02-05 10:10:33 -08:00
|
|
|
# template for search page, which is displayed when no search url is entered
|
|
|
|
# in a collection
|
2014-02-07 19:32:58 -08:00
|
|
|
#search_html: ui/search.html
|
2014-02-01 00:43:24 -08:00
|
|
|
|
2014-02-05 10:10:33 -08:00
|
|
|
# template for home page.
|
|
|
|
# if no other route is set, this will be rendered at /, /index.htm and /index.html
|
2014-02-07 19:32:58 -08:00
|
|
|
#home_html: ui/index.html
|
2014-02-05 10:10:33 -08:00
|
|
|
|
|
|
|
|
|
|
|
# error page temlpate for may formatting error message and details
|
|
|
|
# if omitted, a text response is returned
|
2014-02-07 19:32:58 -08:00
|
|
|
#error_html: ui/error.html
|
2014-02-05 10:10:33 -08:00
|
|
|
|
|
|
|
# ==== Other Paths ====
|
|
|
|
|
2014-02-08 20:07:16 -08:00
|
|
|
# Rewrite urls with absolute paths instead of relative
|
|
|
|
#absoulte_paths: true
|
2014-01-28 22:03:01 -08:00
|
|
|
|
2014-02-07 19:32:58 -08:00
|
|
|
# List of route names:
|
|
|
|
# <route>: <package or file path>
|
2015-03-23 16:15:37 -07:00
|
|
|
# default route static/__pywb for pywb bundled static files
|
2015-03-19 11:20:40 -07:00
|
|
|
#static_routes:
|
2015-03-23 16:15:37 -07:00
|
|
|
# static/__pywb: pywb/static/
|
2014-01-28 22:03:01 -08:00
|
|
|
|
2014-02-05 10:10:33 -08:00
|
|
|
# enable cdx server api for querying cdx directly (experimental)
|
2014-02-18 14:47:48 -08:00
|
|
|
enable_cdx_api: true
|
|
|
|
|
|
|
|
# custom rules for domain specific matching
|
|
|
|
# set to false to disable
|
|
|
|
#domain_specific_rules: rules.yaml
|
2014-02-19 20:20:31 -08:00
|
|
|
|
2014-03-14 10:46:20 -07:00
|
|
|
# Memento support, enable
|
|
|
|
enable_memento: true
|
2014-03-17 13:17:02 -07:00
|
|
|
|
2014-06-14 18:26:19 -07:00
|
|
|
# Replay content in an iframe
|
|
|
|
framed_replay: true
|
2014-07-26 13:24:53 -07:00
|
|
|
|
2014-09-06 17:03:04 -07:00
|
|
|
# debugging utility -- echo request data
|
|
|
|
# debug_echo_env: false
|