mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
proxy: add 'unaltered_replay' option to proxy_options to replay
all content unaltered (no rewriting html, no banner, no wombat) use 'proxy_options' instead of 'routing_options', add additional tests for proxy mode
This commit is contained in:
parent
950673908d
commit
7c57345363
@ -91,9 +91,11 @@ static_routes:
|
||||
# Enable simple http proxy mode
|
||||
enable_http_proxy: true
|
||||
|
||||
# additional options for routing
|
||||
routing_options:
|
||||
proxy_coll_select: false
|
||||
# Additional proxy options (defaults)
|
||||
#proxy_options:
|
||||
# use_default_coll: true
|
||||
#
|
||||
# unaltered_replay: false
|
||||
|
||||
# enable cdx server api for querying cdx directly (experimental)
|
||||
enable_cdx_api: true
|
||||
|
@ -43,21 +43,23 @@ class ProxyRouter(object):
|
||||
See: http://www.mementoweb.org/guide/rfc/#Pattern1.3
|
||||
for more details.
|
||||
"""
|
||||
|
||||
def __init__(self, routes, **kwargs):
|
||||
self.routes = routes
|
||||
self.hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
self.error_view = kwargs.get('error_view')
|
||||
|
||||
routing_options = kwargs.get('routing_options')
|
||||
if not routing_options:
|
||||
routing_options = {}
|
||||
proxy_options = kwargs.get('config', {})
|
||||
if proxy_options:
|
||||
proxy_options = proxy_options.get('proxy_options', {})
|
||||
|
||||
self.auth_msg = routing_options.get('auth_msg',
|
||||
self.auth_msg = proxy_options.get('auth_msg',
|
||||
'Please enter name of a collection to use for proxy mode')
|
||||
|
||||
self.proxy_coll_select = routing_options.get('proxy_coll_select',
|
||||
False)
|
||||
self.use_default_coll = proxy_options.get('use_default_coll', True)
|
||||
|
||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||
|
||||
def __call__(self, env):
|
||||
url = env['REL_REQUEST_URI']
|
||||
@ -76,11 +78,12 @@ class ProxyRouter(object):
|
||||
|
||||
if proxy_auth:
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
if not proxy_coll:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
if matcher:
|
||||
@ -91,12 +94,16 @@ class ProxyRouter(object):
|
||||
if not route:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
elif self.proxy_coll_select:
|
||||
return self.proxy_auth_coll_response()
|
||||
else:
|
||||
# if 'use_default_coll' or only one collection, use that
|
||||
# for proxy mode
|
||||
elif self.use_default_coll or len(self.routes) == 1:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].regex.pattern
|
||||
|
||||
# otherwise, require proxy auth 407 to select collection
|
||||
else:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=url,
|
||||
wb_url_str=url,
|
||||
@ -110,6 +117,9 @@ class ProxyRouter(object):
|
||||
if matcher:
|
||||
route.apply_filters(wbrequest, matcher)
|
||||
|
||||
if self.unaltered:
|
||||
wbrequest.wb_url.mod = 'id_'
|
||||
|
||||
return route.handler(wbrequest)
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
|
@ -22,7 +22,7 @@ _wb_js = (function() {
|
||||
|
||||
var labels = {LOADING_MSG: "Loading...",
|
||||
REPLAY_MSG: "This is an <b>archived</b> page from ",
|
||||
LIVE_MSG: "This is a <b>live</b> page just fetched on "};
|
||||
LIVE_MSG: "This is a <b>live</b> page loaded on "};
|
||||
|
||||
|
||||
function init_banner() {
|
||||
|
@ -265,5 +265,5 @@ def create_wb_router(passed_config={}):
|
||||
error_view=J2TemplateView.create_template(config.get('error_html'),
|
||||
'Error Page'),
|
||||
|
||||
routing_options=config.get('routing_options')
|
||||
config=config
|
||||
)
|
||||
|
@ -106,6 +106,12 @@ static_routes:
|
||||
# Enable simple http proxy mode
|
||||
enable_http_proxy: true
|
||||
|
||||
# Additional proxy options (defaults)
|
||||
proxy_options:
|
||||
use_default_coll: true
|
||||
|
||||
unaltered_replay: false
|
||||
|
||||
# enable cdx server api for querying cdx directly (experimental)
|
||||
#enable_cdx_api: True
|
||||
# or specify suffix
|
||||
|
@ -14,6 +14,10 @@ enable_memento: true
|
||||
# Enable simple http proxy mode
|
||||
enable_http_proxy: true
|
||||
|
||||
# test unaltered replay for proxy as well
|
||||
proxy_options:
|
||||
unaltered_replay: true
|
||||
|
||||
# enable cdx server api for timemap
|
||||
enable_cdx_api: true
|
||||
|
||||
|
@ -354,7 +354,7 @@ class TestWb:
|
||||
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_replay_auth_invalid(self):
|
||||
def test_proxy_replay_auth_invalid_1(self):
|
||||
headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||
@ -362,6 +362,14 @@ class TestWb:
|
||||
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_replay_auth_invalid_2(self):
|
||||
headers = [('Proxy-Authorization', 'basic')]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||
status=407)
|
||||
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_pac(self):
|
||||
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
|
||||
assert resp.content_type == 'application/x-ns-proxy-autoconfig'
|
||||
|
Loading…
x
Reference in New Issue
Block a user