1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Proxy Options (#317)

* proxy mode options: #316
- add 'use_banner' option, if false, will disable standard banner.html from being added
- add 'use_head_insert' option, if false, will disable injecting head_insert.html in proxy mode
both options default to true

* docs: add docs for new proxy options

* also add 'override_route' option and docs for extending proxy routing
This commit is contained in:
Ilya Kreymer 2018-04-20 10:04:34 -07:00 committed by GitHub
parent 804734525c
commit 5349d0518c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 6 deletions

View File

@ -411,6 +411,21 @@ To enable proxy mode, the collection can be specified by running: ``wayback --pr
For HTTP proxy access, this is all that is needed to use the proxy. If pywb is running on port 8080 on localhost, the following curl command should provide proxy access: ``curl -x "localhost:8080" http://example.com/``
Disabling Proxy Banner
^^^^^^^^^^^^^^^^^^^^^^
By default, pywb inserts a default banner into the proxy mode replay to make it clear to users that they are viewing replayed content.
The default banner can be disabled by adding ``use_banner: false`` to the proxy config (this option is checked in the ``banner.html`` template).
However, pywb may still insert additional rewriting code into the head to improve replay (using the ``head_insert.html`` template).
To disable all modifications to the page in proxy mode, add ``use_head_insert: false`` to the config.
Both options default to true, eg::
proxy:
use_banner: true
use_head_insert: true
Proxy Recording
^^^^^^^^^^^^^^^
@ -460,6 +475,11 @@ The following are all the available proxy options -- only ``coll`` is required::
ca_name: pywb HTTPS Proxy CA
ca_file_cache: ./proxy-certs/pywb-ca.pem
recording: false
use_banner: true
use_head_insert: true
(A final option, ``override_route``, allows setting a custom prefix to which to which the proxy requested will be routed.
If set, this option overrides the proxy collection and all other settings, and it is intended for use when extending pywb with custom routing.)
The HTTP/S functionality is provided by the separate :mod:`wsgiprox` utility which provides HTTP/S proxy
for any WSGI application.

View File

@ -407,7 +407,12 @@ class FrontEndApp(object):
else:
logging.info('Proxy enabled for collection "{0}"'.format(proxy_coll))
prefix = '/{0}/bn_/'.format(proxy_coll)
if proxy_config.get('override_route'):
prefix = proxy_config.get('override_route')
elif proxy_config.get('use_head_insert', True):
prefix = '/{0}/bn_/'.format(proxy_coll)
else:
prefix = '/{0}/id_/'.format(proxy_coll)
self.handler = WSGIProxMiddleware(self.handle_request, prefix,
proxy_host=proxy_config.get('host', 'pywb.proxy'),

View File

@ -1,4 +1,5 @@
{% if not env.pywb_proxy_magic or config.proxy.use_banner | default(true) %}
<!-- default banner, create through js -->
<script src='{{ host_prefix }}/{{ static_path }}/default_banner.js'> </script>
<link rel='stylesheet' href='{{ host_prefix }}/{{ static_path }}/default_banner.css'/>
{% endif %}

View File

@ -19,7 +19,9 @@ def scheme(request):
# ============================================================================
class BaseTestProxy(TempDirTests, BaseTestClass):
@classmethod
def setup_class(cls, coll='pywb', config_file='config_test.yaml', recording=False):
def setup_class(cls, coll='pywb', config_file='config_test.yaml', recording=False,
extra_opts={}):
super(BaseTestProxy, cls).setup_class()
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
@ -31,6 +33,8 @@ class BaseTestProxy(TempDirTests, BaseTestClass):
'recording': recording,
}
opts.update(extra_opts)
cls.app = FrontEndApp(config_file=config_file,
custom_config={'proxy': opts})
@ -57,9 +61,11 @@ class TestProxy(BaseTestProxy):
proxies=self.proxies,
verify=self.root_ca_file)
assert 'WB Insert' in res.text
assert 'Example Domain' in res.text
# wb insert
assert 'WB Insert' in res.text
# no wombat.js
assert 'wombat.js' not in res.text
@ -82,6 +88,9 @@ class TestProxy(BaseTestProxy):
# no wombat.js
assert 'wombat.js' not in res.text
# banner
assert 'default_banner.js' in res.text
# no redirect check
assert 'window == window.top' not in res.text
@ -130,10 +139,71 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
def test_proxy_record_keep_percent(self, scheme):
self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/'
res = requests.get('{0}://example.com/%2A%2Ffoobar'.format(scheme),
res = requests.get('{0}://example.com/path/%2A%2Ftest'.format(scheme),
proxies=self.proxies,
verify=self.root_ca_file)
# ensure %-encoded url stays as is
assert '"{0}://example.com/%2A%2Ffoobar"'.format(scheme) in res.text
assert '"{0}://example.com/path/%2A%2Ftest"'.format(scheme) in res.text
# ============================================================================
class TestProxyNoBanner(BaseTestProxy):
@classmethod
def setup_class(cls):
super(TestProxyNoBanner, cls).setup_class(extra_opts={'use_banner': False})
def test_proxy_replay(self, scheme):
res = requests.get('{0}://example.com/'.format(scheme),
proxies=self.proxies,
verify=self.root_ca_file)
# content
assert 'Example Domain' in res.text
# head insert
assert 'WB Insert' in res.text
# no banner
assert 'default_banner.js' not in res.text
# no wombat.js
assert 'wombat.js' not in res.text
# no redirect check
assert 'window == window.top' not in res.text
assert res.headers['Link'] == '<http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"; collection="pywb"'
assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'
# ============================================================================
class TestProxyNoHeadInsert(BaseTestProxy):
@classmethod
def setup_class(cls):
super(TestProxyNoHeadInsert, cls).setup_class(extra_opts={'use_head_insert': False})
def test_proxy_replay(self, scheme):
res = requests.get('{0}://example.com/'.format(scheme),
proxies=self.proxies,
verify=self.root_ca_file)
# content
assert 'Example Domain' in res.text
# no head insert
assert 'WB Insert' not in res.text
# no banner
assert 'default_banner.js' not in res.text
# no wombat.js
assert 'wombat.js' not in res.text
# no redirect check
assert 'window == window.top' not in res.text
assert res.headers['Link'] == '<http://example.com>; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"; collection="pywb"'
assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT'