From 7c573453634d7abcc682ebca5e6de1abfa5ec6e7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 21 Jul 2014 16:42:14 -0700 Subject: [PATCH] proxy: add 'unaltered_replay' option to proxy_options to replay all content unaltered (no rewriting html, no banner, no wombat) use 'proxy_options' instead of 'routing_options', add additional tests for proxy mode --- config.yaml | 8 +++++--- pywb/framework/proxy.py | 30 ++++++++++++++++++++---------- pywb/static/wb.js | 2 +- pywb/webapp/pywb_init.py | 2 +- tests/test_config.yaml | 6 ++++++ tests/test_config_memento.yaml | 4 ++++ tests/test_integration.py | 10 +++++++++- 7 files changed, 46 insertions(+), 16 deletions(-) diff --git a/config.yaml b/config.yaml index 91051b81..937b4545 100644 --- a/config.yaml +++ b/config.yaml @@ -91,9 +91,11 @@ static_routes: # Enable simple http proxy mode enable_http_proxy: true -# additional options for routing -routing_options: - proxy_coll_select: false +# Additional proxy options (defaults) +#proxy_options: +# use_default_coll: true +# +# unaltered_replay: false # enable cdx server api for querying cdx directly (experimental) enable_cdx_api: true diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index bdf5753f..62bc06b0 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -43,21 +43,23 @@ class ProxyRouter(object): See: http://www.mementoweb.org/guide/rfc/#Pattern1.3 for more details. """ + def __init__(self, routes, **kwargs): self.routes = routes self.hostpaths = kwargs.get('hostpaths') self.error_view = kwargs.get('error_view') - routing_options = kwargs.get('routing_options') - if not routing_options: - routing_options = {} + proxy_options = kwargs.get('config', {}) + if proxy_options: + proxy_options = proxy_options.get('proxy_options', {}) - self.auth_msg = routing_options.get('auth_msg', + self.auth_msg = proxy_options.get('auth_msg', 'Please enter name of a collection to use for proxy mode') - self.proxy_coll_select = routing_options.get('proxy_coll_select', - False) + self.use_default_coll = proxy_options.get('use_default_coll', True) + + self.unaltered = proxy_options.get('unaltered_replay', False) def __call__(self, env): url = env['REL_REQUEST_URI'] @@ -76,11 +78,12 @@ class ProxyRouter(object): if proxy_auth: proxy_coll = self.read_basic_auth_coll(proxy_auth) - proxy_coll = '/' + proxy_coll + '/' if not proxy_coll: return self.proxy_auth_coll_response() + proxy_coll = '/' + proxy_coll + '/' + for r in self.routes: matcher, c = r.is_handling(proxy_coll) if matcher: @@ -91,12 +94,16 @@ class ProxyRouter(object): if not route: return self.proxy_auth_coll_response() - elif self.proxy_coll_select: - return self.proxy_auth_coll_response() - else: + # if 'use_default_coll' or only one collection, use that + # for proxy mode + elif self.use_default_coll or len(self.routes) == 1: route = self.routes[0] coll = self.routes[0].regex.pattern + # otherwise, require proxy auth 407 to select collection + else: + return self.proxy_auth_coll_response() + wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, @@ -110,6 +117,9 @@ class ProxyRouter(object): if matcher: route.apply_filters(wbrequest, matcher) + if self.unaltered: + wbrequest.wb_url.mod = 'id_' + return route.handler(wbrequest) # Proxy Auto-Config (PAC) script for the proxy diff --git a/pywb/static/wb.js b/pywb/static/wb.js index 81d40f42..0244cde8 100644 --- a/pywb/static/wb.js +++ b/pywb/static/wb.js @@ -22,7 +22,7 @@ _wb_js = (function() { var labels = {LOADING_MSG: "Loading...", REPLAY_MSG: "This is an archived page from ", - LIVE_MSG: "This is a live page just fetched on "}; + LIVE_MSG: "This is a live page loaded on "}; function init_banner() { diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py index fb77d211..e17a9485 100644 --- a/pywb/webapp/pywb_init.py +++ b/pywb/webapp/pywb_init.py @@ -265,5 +265,5 @@ def create_wb_router(passed_config={}): error_view=J2TemplateView.create_template(config.get('error_html'), 'Error Page'), - routing_options=config.get('routing_options') + config=config ) diff --git a/tests/test_config.yaml b/tests/test_config.yaml index c1e562f8..468a3131 100644 --- a/tests/test_config.yaml +++ b/tests/test_config.yaml @@ -106,6 +106,12 @@ static_routes: # Enable simple http proxy mode enable_http_proxy: true +# Additional proxy options (defaults) +proxy_options: + use_default_coll: true + + unaltered_replay: false + # enable cdx server api for querying cdx directly (experimental) #enable_cdx_api: True # or specify suffix diff --git a/tests/test_config_memento.yaml b/tests/test_config_memento.yaml index 003a3145..e8d0eb21 100644 --- a/tests/test_config_memento.yaml +++ b/tests/test_config_memento.yaml @@ -14,6 +14,10 @@ enable_memento: true # Enable simple http proxy mode enable_http_proxy: true +# test unaltered replay for proxy as well +proxy_options: + unaltered_replay: true + # enable cdx server api for timemap enable_cdx_api: true diff --git a/tests/test_integration.py b/tests/test_integration.py index 4532b0d6..94ce45cf 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -354,7 +354,7 @@ class TestWb: assert resp.status_int == 407 - def test_proxy_replay_auth_invalid(self): + def test_proxy_replay_auth_invalid_1(self): headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))] resp = self.testapp.get('/x-ignore-this-x', headers = headers, extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''), @@ -362,6 +362,14 @@ class TestWb: assert resp.status_int == 407 + def test_proxy_replay_auth_invalid_2(self): + headers = [('Proxy-Authorization', 'basic')] + resp = self.testapp.get('/x-ignore-this-x', headers = headers, + extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''), + status=407) + + assert resp.status_int == 407 + def test_proxy_pac(self): resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080')) assert resp.content_type == 'application/x-ns-proxy-autoconfig'