1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

proxy: use FrontEndApp.proxy_route_request() to determine proxy route

Extensions can override this function to provide custom proxy routing
Update docs
This commit is contained in:
Ilya Kreymer 2018-04-20 15:20:56 -07:00
parent 5349d0518c
commit de3ec0e1bc
3 changed files with 20 additions and 12 deletions

View File

@ -478,11 +478,11 @@ The following are all the available proxy options -- only ``coll`` is required::
use_banner: true
use_head_insert: true
(A final option, ``override_route``, allows setting a custom prefix to which to which the proxy requested will be routed.
If set, this option overrides the proxy collection and all other settings, and it is intended for use when extending pywb with custom routing.)
The HTTP/S functionality is provided by the separate :mod:`wsgiprox` utility which provides HTTP/S proxy routing
to any WSGI application.
The HTTP/S functionality is provided by the separate :mod:`wsgiprox` utility which provides HTTP/S proxy
for any WSGI application.
Using ``wsgiprox``, pywb sets ``FrontEndApp.proxy_route_request()`` as the proxy resolver, and this function returns the full collection path that pywb uses to route each proxy request.
Extensions to pywb can override ``proxy_route_request()`` to provide custom handling, such as setting the collection dynamically or based on external data sources.
See the `wsgiprox README <https://github.com/webrecorder/wsgiprox/blob/master/README.rst>`_ for additional details on how it works.

View File

@ -380,6 +380,7 @@ class FrontEndApp(object):
def init_proxy(self, config):
proxy_config = config.get('proxy')
self.proxy_prefix = None
if not proxy_config:
return
@ -407,17 +408,24 @@ class FrontEndApp(object):
else:
logging.info('Proxy enabled for collection "{0}"'.format(proxy_coll))
if proxy_config.get('override_route'):
prefix = proxy_config.get('override_route')
elif proxy_config.get('use_head_insert', True):
prefix = '/{0}/bn_/'.format(proxy_coll)
if proxy_config.get('use_head_insert', True):
self.proxy_prefix = '/{0}/bn_/'.format(proxy_coll)
else:
prefix = '/{0}/id_/'.format(proxy_coll)
self.proxy_prefix = '/{0}/id_/'.format(proxy_coll)
self.handler = WSGIProxMiddleware(self.handle_request, prefix,
self.handler = WSGIProxMiddleware(self.handle_request,
self.proxy_route_request,
proxy_host=proxy_config.get('host', 'pywb.proxy'),
proxy_options=proxy_config)
def proxy_route_request(self, url, environ):
""" Return the full url that this proxy request will be routed to
The 'environ' PATH_INFO and REQUEST_URI will be modified based on the returned url
Default is to use the 'proxy_prefix' to point to the proxy collection
"""
return self.proxy_prefix + url
# ============================================================================
class MetadataCache(object):

View File

@ -127,7 +127,7 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
def test_proxy_replay_recorded(self, scheme):
manager(['reindex', 'test'])
self.app.handler.prefix_resolver.fixed_prefix = '/test/bn_/'
self.app.proxy_prefix = '/test/bn_/'
res = requests.get('{0}://httpbin.org/'.format(scheme),
proxies=self.proxies,
@ -137,7 +137,7 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
assert 'httpbin(1)' in res.text
def test_proxy_record_keep_percent(self, scheme):
self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/'
self.app.proxy_prefix = '/test/record/bn_/'
res = requests.get('{0}://example.com/path/%2A%2Ftest'.format(scheme),
proxies=self.proxies,