1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

proxy: add 'unaltered_replay' option to proxy_options to replay

all content unaltered (no rewriting html, no banner, no wombat)
use 'proxy_options' instead of 'routing_options', add additional
tests for proxy mode
This commit is contained in:
Ilya Kreymer 2014-07-21 16:42:14 -07:00
parent 950673908d
commit 7c57345363
7 changed files with 46 additions and 16 deletions

View File

@ -91,9 +91,11 @@ static_routes:
# Enable simple http proxy mode
enable_http_proxy: true
# additional options for routing
routing_options:
proxy_coll_select: false
# Additional proxy options (defaults)
#proxy_options:
# use_default_coll: true
#
# unaltered_replay: false
# enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: true

View File

@ -43,21 +43,23 @@ class ProxyRouter(object):
See: http://www.mementoweb.org/guide/rfc/#Pattern1.3
for more details.
"""
def __init__(self, routes, **kwargs):
self.routes = routes
self.hostpaths = kwargs.get('hostpaths')
self.error_view = kwargs.get('error_view')
routing_options = kwargs.get('routing_options')
if not routing_options:
routing_options = {}
proxy_options = kwargs.get('config', {})
if proxy_options:
proxy_options = proxy_options.get('proxy_options', {})
self.auth_msg = routing_options.get('auth_msg',
self.auth_msg = proxy_options.get('auth_msg',
'Please enter name of a collection to use for proxy mode')
self.proxy_coll_select = routing_options.get('proxy_coll_select',
False)
self.use_default_coll = proxy_options.get('use_default_coll', True)
self.unaltered = proxy_options.get('unaltered_replay', False)
def __call__(self, env):
url = env['REL_REQUEST_URI']
@ -76,11 +78,12 @@ class ProxyRouter(object):
if proxy_auth:
proxy_coll = self.read_basic_auth_coll(proxy_auth)
proxy_coll = '/' + proxy_coll + '/'
if not proxy_coll:
return self.proxy_auth_coll_response()
proxy_coll = '/' + proxy_coll + '/'
for r in self.routes:
matcher, c = r.is_handling(proxy_coll)
if matcher:
@ -91,12 +94,16 @@ class ProxyRouter(object):
if not route:
return self.proxy_auth_coll_response()
elif self.proxy_coll_select:
return self.proxy_auth_coll_response()
else:
# if 'use_default_coll' or only one collection, use that
# for proxy mode
elif self.use_default_coll or len(self.routes) == 1:
route = self.routes[0]
coll = self.routes[0].regex.pattern
# otherwise, require proxy auth 407 to select collection
else:
return self.proxy_auth_coll_response()
wbrequest = route.request_class(env,
request_uri=url,
wb_url_str=url,
@ -110,6 +117,9 @@ class ProxyRouter(object):
if matcher:
route.apply_filters(wbrequest, matcher)
if self.unaltered:
wbrequest.wb_url.mod = 'id_'
return route.handler(wbrequest)
# Proxy Auto-Config (PAC) script for the proxy

View File

@ -22,7 +22,7 @@ _wb_js = (function() {
var labels = {LOADING_MSG: "Loading...",
REPLAY_MSG: "This is an <b>archived</b> page from ",
LIVE_MSG: "This is a <b>live</b> page just fetched on "};
LIVE_MSG: "This is a <b>live</b> page loaded on "};
function init_banner() {

View File

@ -265,5 +265,5 @@ def create_wb_router(passed_config={}):
error_view=J2TemplateView.create_template(config.get('error_html'),
'Error Page'),
routing_options=config.get('routing_options')
config=config
)

View File

@ -106,6 +106,12 @@ static_routes:
# Enable simple http proxy mode
enable_http_proxy: true
# Additional proxy options (defaults)
proxy_options:
use_default_coll: true
unaltered_replay: false
# enable cdx server api for querying cdx directly (experimental)
#enable_cdx_api: True
# or specify suffix

View File

@ -14,6 +14,10 @@ enable_memento: true
# Enable simple http proxy mode
enable_http_proxy: true
# test unaltered replay for proxy as well
proxy_options:
unaltered_replay: true
# enable cdx server api for timemap
enable_cdx_api: true

View File

@ -354,7 +354,7 @@ class TestWb:
assert resp.status_int == 407
def test_proxy_replay_auth_invalid(self):
def test_proxy_replay_auth_invalid_1(self):
headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))]
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
@ -362,6 +362,14 @@ class TestWb:
assert resp.status_int == 407
def test_proxy_replay_auth_invalid_2(self):
headers = [('Proxy-Authorization', 'basic')]
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
status=407)
assert resp.status_int == 407
def test_proxy_pac(self):
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
assert resp.content_type == 'application/x-ns-proxy-autoconfig'