1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

proxy: add 'unaltered_replay' option to proxy_options to replay

all content unaltered (no rewriting html, no banner, no wombat)
use 'proxy_options' instead of 'routing_options', add additional
tests for proxy mode
This commit is contained in:
Ilya Kreymer 2014-07-21 16:42:14 -07:00
parent 950673908d
commit 7c57345363
7 changed files with 46 additions and 16 deletions

View File

@ -91,9 +91,11 @@ static_routes:
# Enable simple http proxy mode # Enable simple http proxy mode
enable_http_proxy: true enable_http_proxy: true
# additional options for routing # Additional proxy options (defaults)
routing_options: #proxy_options:
proxy_coll_select: false # use_default_coll: true
#
# unaltered_replay: false
# enable cdx server api for querying cdx directly (experimental) # enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: true enable_cdx_api: true

View File

@ -43,21 +43,23 @@ class ProxyRouter(object):
See: http://www.mementoweb.org/guide/rfc/#Pattern1.3 See: http://www.mementoweb.org/guide/rfc/#Pattern1.3
for more details. for more details.
""" """
def __init__(self, routes, **kwargs): def __init__(self, routes, **kwargs):
self.routes = routes self.routes = routes
self.hostpaths = kwargs.get('hostpaths') self.hostpaths = kwargs.get('hostpaths')
self.error_view = kwargs.get('error_view') self.error_view = kwargs.get('error_view')
routing_options = kwargs.get('routing_options') proxy_options = kwargs.get('config', {})
if not routing_options: if proxy_options:
routing_options = {} proxy_options = proxy_options.get('proxy_options', {})
self.auth_msg = routing_options.get('auth_msg', self.auth_msg = proxy_options.get('auth_msg',
'Please enter name of a collection to use for proxy mode') 'Please enter name of a collection to use for proxy mode')
self.proxy_coll_select = routing_options.get('proxy_coll_select', self.use_default_coll = proxy_options.get('use_default_coll', True)
False)
self.unaltered = proxy_options.get('unaltered_replay', False)
def __call__(self, env): def __call__(self, env):
url = env['REL_REQUEST_URI'] url = env['REL_REQUEST_URI']
@ -76,11 +78,12 @@ class ProxyRouter(object):
if proxy_auth: if proxy_auth:
proxy_coll = self.read_basic_auth_coll(proxy_auth) proxy_coll = self.read_basic_auth_coll(proxy_auth)
proxy_coll = '/' + proxy_coll + '/'
if not proxy_coll: if not proxy_coll:
return self.proxy_auth_coll_response() return self.proxy_auth_coll_response()
proxy_coll = '/' + proxy_coll + '/'
for r in self.routes: for r in self.routes:
matcher, c = r.is_handling(proxy_coll) matcher, c = r.is_handling(proxy_coll)
if matcher: if matcher:
@ -91,12 +94,16 @@ class ProxyRouter(object):
if not route: if not route:
return self.proxy_auth_coll_response() return self.proxy_auth_coll_response()
elif self.proxy_coll_select: # if 'use_default_coll' or only one collection, use that
return self.proxy_auth_coll_response() # for proxy mode
else: elif self.use_default_coll or len(self.routes) == 1:
route = self.routes[0] route = self.routes[0]
coll = self.routes[0].regex.pattern coll = self.routes[0].regex.pattern
# otherwise, require proxy auth 407 to select collection
else:
return self.proxy_auth_coll_response()
wbrequest = route.request_class(env, wbrequest = route.request_class(env,
request_uri=url, request_uri=url,
wb_url_str=url, wb_url_str=url,
@ -110,6 +117,9 @@ class ProxyRouter(object):
if matcher: if matcher:
route.apply_filters(wbrequest, matcher) route.apply_filters(wbrequest, matcher)
if self.unaltered:
wbrequest.wb_url.mod = 'id_'
return route.handler(wbrequest) return route.handler(wbrequest)
# Proxy Auto-Config (PAC) script for the proxy # Proxy Auto-Config (PAC) script for the proxy

View File

@ -22,7 +22,7 @@ _wb_js = (function() {
var labels = {LOADING_MSG: "Loading...", var labels = {LOADING_MSG: "Loading...",
REPLAY_MSG: "This is an <b>archived</b> page from ", REPLAY_MSG: "This is an <b>archived</b> page from ",
LIVE_MSG: "This is a <b>live</b> page just fetched on "}; LIVE_MSG: "This is a <b>live</b> page loaded on "};
function init_banner() { function init_banner() {

View File

@ -265,5 +265,5 @@ def create_wb_router(passed_config={}):
error_view=J2TemplateView.create_template(config.get('error_html'), error_view=J2TemplateView.create_template(config.get('error_html'),
'Error Page'), 'Error Page'),
routing_options=config.get('routing_options') config=config
) )

View File

@ -106,6 +106,12 @@ static_routes:
# Enable simple http proxy mode # Enable simple http proxy mode
enable_http_proxy: true enable_http_proxy: true
# Additional proxy options (defaults)
proxy_options:
use_default_coll: true
unaltered_replay: false
# enable cdx server api for querying cdx directly (experimental) # enable cdx server api for querying cdx directly (experimental)
#enable_cdx_api: True #enable_cdx_api: True
# or specify suffix # or specify suffix

View File

@ -14,6 +14,10 @@ enable_memento: true
# Enable simple http proxy mode # Enable simple http proxy mode
enable_http_proxy: true enable_http_proxy: true
# test unaltered replay for proxy as well
proxy_options:
unaltered_replay: true
# enable cdx server api for timemap # enable cdx server api for timemap
enable_cdx_api: true enable_cdx_api: true

View File

@ -354,7 +354,7 @@ class TestWb:
assert resp.status_int == 407 assert resp.status_int == 407
def test_proxy_replay_auth_invalid(self): def test_proxy_replay_auth_invalid_1(self):
headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))] headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))]
resp = self.testapp.get('/x-ignore-this-x', headers = headers, resp = self.testapp.get('/x-ignore-this-x', headers = headers,
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''), extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
@ -362,6 +362,14 @@ class TestWb:
assert resp.status_int == 407 assert resp.status_int == 407
def test_proxy_replay_auth_invalid_2(self):
headers = [('Proxy-Authorization', 'basic')]
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
status=407)
assert resp.status_int == 407
def test_proxy_pac(self): def test_proxy_pac(self):
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080')) resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
assert resp.content_type == 'application/x-ns-proxy-autoconfig' assert resp.content_type == 'application/x-ns-proxy-autoconfig'