diff --git a/docs/manual/configuring.rst b/docs/manual/configuring.rst index 9296fcf6..42d98852 100644 --- a/docs/manual/configuring.rst +++ b/docs/manual/configuring.rst @@ -478,11 +478,11 @@ The following are all the available proxy options -- only ``coll`` is required:: use_banner: true use_head_insert: true -(A final option, ``override_route``, allows setting a custom prefix to which to which the proxy requested will be routed. -If set, this option overrides the proxy collection and all other settings, and it is intended for use when extending pywb with custom routing.) +The HTTP/S functionality is provided by the separate :mod:`wsgiprox` utility which provides HTTP/S proxy routing +to any WSGI application. -The HTTP/S functionality is provided by the separate :mod:`wsgiprox` utility which provides HTTP/S proxy -for any WSGI application. +Using ``wsgiprox``, pywb sets ``FrontEndApp.proxy_route_request()`` as the proxy resolver, and this function returns the full collection path that pywb uses to route each proxy request. +Extensions to pywb can override ``proxy_route_request()`` to provide custom handling, such as setting the collection dynamically or based on external data sources. See the `wsgiprox README `_ for additional details on how it works. diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 9cc932f5..06ce556d 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -380,6 +380,7 @@ class FrontEndApp(object): def init_proxy(self, config): proxy_config = config.get('proxy') + self.proxy_prefix = None if not proxy_config: return @@ -407,17 +408,24 @@ class FrontEndApp(object): else: logging.info('Proxy enabled for collection "{0}"'.format(proxy_coll)) - if proxy_config.get('override_route'): - prefix = proxy_config.get('override_route') - elif proxy_config.get('use_head_insert', True): - prefix = '/{0}/bn_/'.format(proxy_coll) + if proxy_config.get('use_head_insert', True): + self.proxy_prefix = '/{0}/bn_/'.format(proxy_coll) else: - prefix = '/{0}/id_/'.format(proxy_coll) + self.proxy_prefix = '/{0}/id_/'.format(proxy_coll) - self.handler = WSGIProxMiddleware(self.handle_request, prefix, + self.handler = WSGIProxMiddleware(self.handle_request, + self.proxy_route_request, proxy_host=proxy_config.get('host', 'pywb.proxy'), proxy_options=proxy_config) + def proxy_route_request(self, url, environ): + """ Return the full url that this proxy request will be routed to + The 'environ' PATH_INFO and REQUEST_URI will be modified based on the returned url + + Default is to use the 'proxy_prefix' to point to the proxy collection + """ + return self.proxy_prefix + url + # ============================================================================ class MetadataCache(object): diff --git a/tests/test_proxy.py b/tests/test_proxy.py index da5299d2..0fee6b1a 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -127,7 +127,7 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy): def test_proxy_replay_recorded(self, scheme): manager(['reindex', 'test']) - self.app.handler.prefix_resolver.fixed_prefix = '/test/bn_/' + self.app.proxy_prefix = '/test/bn_/' res = requests.get('{0}://httpbin.org/'.format(scheme), proxies=self.proxies, @@ -137,7 +137,7 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy): assert 'httpbin(1)' in res.text def test_proxy_record_keep_percent(self, scheme): - self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/' + self.app.proxy_prefix = '/test/record/bn_/' res = requests.get('{0}://example.com/path/%2A%2Ftest'.format(scheme), proxies=self.proxies,