mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
route selection via proxy auth!
refactor poute request parsing to happen in the actual router class instead of in the route in proxy mode, add support for picking a route via proxy-auth improve test for 'top' rewriting
This commit is contained in:
parent
daffc7ff5d
commit
1317b2b10f
@ -29,16 +29,49 @@ class ArchivalRouter(object):
|
||||
self.error_view = kwargs.get('error_view')
|
||||
|
||||
def __call__(self, env):
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
|
||||
for route in self.routes:
|
||||
result = route(env, self.abs_path)
|
||||
if result:
|
||||
return result
|
||||
matcher, coll = route.is_handling(request_uri)
|
||||
if matcher:
|
||||
wbrequest = self.parse_request(route, env, matcher,
|
||||
coll, request_uri,
|
||||
use_abs_prefix=self.abs_path)
|
||||
|
||||
return route.handler(wbrequest)
|
||||
|
||||
# Default Home Page
|
||||
if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']:
|
||||
if request_uri in ['/', '/index.html', '/index.htm']:
|
||||
return self.render_home_page(env)
|
||||
|
||||
return self.fallback(env, self.routes) if self.fallback else None
|
||||
return self.fallback(env, self) if self.fallback else None
|
||||
|
||||
def parse_request(self, route, env, matcher, coll, request_uri,
|
||||
use_abs_prefix=False):
|
||||
matched_str = matcher.group(0)
|
||||
if matched_str:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
|
||||
# remove the '/' + rel_prefix part of uri
|
||||
wb_url_str = request_uri[len(matched_str) + 2:]
|
||||
else:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/'
|
||||
# the request_uri is the wb_url, since no coll
|
||||
wb_url_str = request_uri[1:]
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=request_uri,
|
||||
wb_url_str=wb_url_str,
|
||||
rel_prefix=rel_prefix,
|
||||
coll=coll,
|
||||
use_abs_prefix=use_abs_prefix,
|
||||
wburl_class=route.handler.get_wburl_type(),
|
||||
urlrewriter_class=UrlRewriter)
|
||||
|
||||
# Allow for applying of additional filters
|
||||
route.apply_filters(wbrequest, matcher)
|
||||
|
||||
return wbrequest
|
||||
|
||||
|
||||
def render_home_page(self, env):
|
||||
# render the homepage!
|
||||
@ -73,45 +106,15 @@ class Route(object):
|
||||
self.coll_group = coll_group
|
||||
self._custom_init(config)
|
||||
|
||||
def __call__(self, env, use_abs_prefix):
|
||||
wbrequest = self.parse_request(env, use_abs_prefix)
|
||||
return self.handler(wbrequest) if wbrequest else None
|
||||
|
||||
def parse_request(self, env, use_abs_prefix, request_uri=None):
|
||||
if not request_uri:
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
|
||||
def is_handling(self, request_uri):
|
||||
matcher = self.regex.match(request_uri[1:])
|
||||
if not matcher:
|
||||
return None
|
||||
|
||||
matched_str = matcher.group(0)
|
||||
if matched_str:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
|
||||
# remove the '/' + rel_prefix part of uri
|
||||
wb_url_str = request_uri[len(matched_str) + 2:]
|
||||
else:
|
||||
rel_prefix = env['SCRIPT_NAME'] + '/'
|
||||
# the request_uri is the wb_url, since no coll
|
||||
wb_url_str = request_uri[1:]
|
||||
return None, None
|
||||
|
||||
coll = matcher.group(self.coll_group)
|
||||
return matcher, coll
|
||||
|
||||
wbrequest = self.request_class(env,
|
||||
request_uri=request_uri,
|
||||
wb_url_str=wb_url_str,
|
||||
rel_prefix=rel_prefix,
|
||||
coll=coll,
|
||||
use_abs_prefix=use_abs_prefix,
|
||||
wburl_class=self.handler.get_wburl_type(),
|
||||
urlrewriter_class=UrlRewriter)
|
||||
|
||||
# Allow for applying of additional filters
|
||||
self._apply_filters(wbrequest, matcher)
|
||||
|
||||
return wbrequest
|
||||
|
||||
def _apply_filters(self, wbrequest, matcher):
|
||||
def apply_filters(self, wbrequest, matcher):
|
||||
for filter in self.filters:
|
||||
last_grp = len(matcher.groups())
|
||||
filter_str = filter.format(matcher.group(last_grp))
|
||||
@ -136,9 +139,11 @@ class ReferRedirect:
|
||||
else:
|
||||
self.match_prefixs = [match_prefixs]
|
||||
|
||||
def __call__(self, env, routes):
|
||||
def __call__(self, env, the_router):
|
||||
referrer = env.get('HTTP_REFERER')
|
||||
|
||||
routes = the_router.routes
|
||||
|
||||
# ensure there is a referrer
|
||||
if referrer is None:
|
||||
return None
|
||||
@ -166,17 +171,15 @@ class ReferRedirect:
|
||||
ref_request = None
|
||||
|
||||
for route in routes:
|
||||
ref_request = route.parse_request(env, False, request_uri=path)
|
||||
if ref_request:
|
||||
matcher, coll = route.is_handling(path)
|
||||
if matcher:
|
||||
ref_request = the_router.parse_request(route, env,
|
||||
matcher, coll, path)
|
||||
ref_route = route
|
||||
break
|
||||
|
||||
# must have matched one of the routes
|
||||
if not ref_request:
|
||||
return None
|
||||
|
||||
# must have a rewriter
|
||||
if not ref_request.urlrewriter:
|
||||
# must have matched one of the routes with a urlrewriter
|
||||
if not ref_request or not ref_request.urlrewriter:
|
||||
return None
|
||||
|
||||
rewriter = ref_request.urlrewriter
|
||||
|
@ -1,8 +1,11 @@
|
||||
from wbrequestresponse import WbResponse, WbRequest
|
||||
from archivalrouter import ArchivalRouter
|
||||
|
||||
import urlparse
|
||||
import base64
|
||||
|
||||
from pywb.rewrite.url_rewriter import HttpsUrlRewriter
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -15,10 +18,7 @@ class ProxyArchivalRouter(ArchivalRouter):
|
||||
"""
|
||||
def __init__(self, routes, **kwargs):
|
||||
super(ProxyArchivalRouter, self).__init__(routes, **kwargs)
|
||||
request_class = routes[0].request_class
|
||||
self.proxy = ProxyRouter(routes[0].handler,
|
||||
request_class=request_class,
|
||||
**kwargs)
|
||||
self.proxy = ProxyRouter(routes, **kwargs)
|
||||
|
||||
def __call__(self, env):
|
||||
response = self.proxy(env)
|
||||
@ -43,12 +43,14 @@ class ProxyRouter(object):
|
||||
See: http://www.mementoweb.org/guide/rfc/#Pattern1.3
|
||||
for more details.
|
||||
"""
|
||||
def __init__(self, handler, **kwargs):
|
||||
self.handler = handler
|
||||
def __init__(self, routes, **kwargs):
|
||||
self.routes = routes
|
||||
self.hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
self.error_view = kwargs.get('error_view')
|
||||
self.request_class = kwargs.get('request_class')
|
||||
|
||||
self.auth_msg = kwargs.get('auth_msg',
|
||||
'Please enter name of a collection to use for proxy mode')
|
||||
|
||||
def __call__(self, env):
|
||||
url = env['REL_REQUEST_URI']
|
||||
@ -59,16 +61,50 @@ class ProxyRouter(object):
|
||||
if not url.startswith('http://'):
|
||||
return None
|
||||
|
||||
wbrequest = self.request_class(env,
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
|
||||
if proxy_auth:
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
if not proxy_coll:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
print r.regex.pattern
|
||||
if matcher:
|
||||
route = r
|
||||
coll = c
|
||||
break
|
||||
|
||||
if not route:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
print 'COLL ', coll
|
||||
|
||||
else:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].regex.pattern
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=url,
|
||||
wb_url_str=url,
|
||||
coll=coll,
|
||||
host_prefix=self.hostpaths[0],
|
||||
wburl_class=self.handler.get_wburl_type(),
|
||||
wburl_class=route.handler.get_wburl_type(),
|
||||
urlrewriter_class=HttpsUrlRewriter,
|
||||
use_abs_prefix=False,
|
||||
is_proxy=True)
|
||||
|
||||
return self.handler(wbrequest)
|
||||
if matcher:
|
||||
route.apply_filters(wbrequest, matcher)
|
||||
|
||||
return route.handler(wbrequest)
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
def make_pac_response(self, env):
|
||||
@ -97,3 +133,27 @@ class ProxyRouter(object):
|
||||
content_type = 'application/x-ns-proxy-autoconfig'
|
||||
|
||||
return WbResponse.text_response(buff, content_type=content_type)
|
||||
|
||||
def proxy_auth_coll_response(self):
|
||||
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
|
||||
|
||||
headers = [('Content-Type', 'text/plain'),
|
||||
('Proxy-Authenticate', proxy_msg)]
|
||||
|
||||
status_headers = StatusAndHeaders('407 Proxy Authentication', headers)
|
||||
|
||||
value = self.auth_msg
|
||||
|
||||
return WbResponse(status_headers, value=[value])
|
||||
|
||||
@staticmethod
|
||||
def read_basic_auth_coll(value):
|
||||
parts = value.split(' ')
|
||||
if parts[0].lower() != 'basic':
|
||||
return ''
|
||||
|
||||
if len(parts) != 2:
|
||||
return ''
|
||||
|
||||
user_pass = base64.b64decode(parts[1])
|
||||
return user_pass.split(':')[0]
|
||||
|
@ -1,7 +1,7 @@
|
||||
"""
|
||||
# Test WbRequest parsed via a Route
|
||||
# route with relative path, print resulting wbrequest
|
||||
>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False))
|
||||
>>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''})
|
||||
{'coll': 'web',
|
||||
'request_uri': '/web/test.example.com',
|
||||
'wb_prefix': '/web/',
|
||||
@ -9,21 +9,21 @@
|
||||
|
||||
|
||||
# route with absolute path, running at script /my_pywb, print resultingwbrequest
|
||||
>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True))
|
||||
>>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)
|
||||
{'coll': 'web',
|
||||
'request_uri': '/web/2013im_/test.example.com',
|
||||
'wb_prefix': 'https://localhost:8081/my_pywb/web/',
|
||||
'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')}
|
||||
|
||||
# route with no collection
|
||||
>>> print_req(Route('', BaseHandler())({'REL_REQUEST_URI': 'http://example.com', 'SCRIPT_NAME': '/pywb'}, False))
|
||||
>>> _test_route_req(Route('', BaseHandler()), {'REL_REQUEST_URI': 'http://example.com', 'SCRIPT_NAME': '/pywb'})
|
||||
{'coll': '',
|
||||
'request_uri': 'http://example.com',
|
||||
'wb_prefix': '/pywb/',
|
||||
'wb_url': None}
|
||||
|
||||
# not matching route -- skipped
|
||||
>>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}, False)
|
||||
>>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
|
||||
|
||||
|
||||
# Referer Redirect Test
|
||||
@ -84,11 +84,18 @@ False
|
||||
|
||||
"""
|
||||
|
||||
from pywb.framework.archivalrouter import Route, ReferRedirect
|
||||
from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
|
||||
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||
import pprint
|
||||
|
||||
def print_req(req):
|
||||
def _test_route_req(route, env, abs_path=False):
|
||||
matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
|
||||
if not matcher:
|
||||
return
|
||||
|
||||
the_router = ArchivalRouter([route], abs_path=abs_path)
|
||||
req = the_router.parse_request(route, env, matcher, coll, env['REL_REQUEST_URI'], abs_path)
|
||||
|
||||
varlist = vars(req)
|
||||
the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
|
||||
pprint.pprint(the_dict)
|
||||
@ -102,9 +109,11 @@ def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'col
|
||||
|
||||
routes = [Route(coll, WbUrlHandler())]
|
||||
|
||||
the_router = ArchivalRouter(routes)
|
||||
|
||||
redir = ReferRedirect(match_host)
|
||||
#req = WbRequest.from_uri(request_uri, env)
|
||||
rep = redir(env, routes)
|
||||
rep = redir(env, the_router)
|
||||
if not rep:
|
||||
return False
|
||||
|
||||
|
@ -83,7 +83,8 @@ def test_example_domain_specific_3():
|
||||
assert '/* Bootloader.configurePage' in buff
|
||||
|
||||
def test_wombat_top():
|
||||
status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
|
||||
#status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/toptest.js', urlrewriter)
|
||||
|
||||
assert 'WB_wombat_top!==window' in buff
|
||||
|
||||
|
1
sample_archive/text_content/toptest.js
Normal file
1
sample_archive/text_content/toptest.js
Normal file
@ -0,0 +1 @@
|
||||
!function(){top!==window&&(alert("For security reasons, framing is not allowed."),top.location.replace(document.location))}
|
@ -17,6 +17,10 @@ collections:
|
||||
index_paths: './sample_archive/cdx/'
|
||||
filters: ['filename:dupe*']
|
||||
|
||||
pywb-filt-2:
|
||||
index_paths: './sample_archive/cdx/'
|
||||
filters: ['!filename:dupe*']
|
||||
|
||||
pywb-nonframe:
|
||||
index_paths: './sample_archive/cdx/'
|
||||
framed_replay: false
|
||||
|
@ -1,5 +1,6 @@
|
||||
from pytest import raises
|
||||
import webtest
|
||||
import base64
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.framework.wsgi_wrappers import init_app
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
@ -317,6 +318,42 @@ class TestWb:
|
||||
assert 'Sun, Jan 26 2014 20:11:27' in resp.body
|
||||
assert 'wb.js' in resp.body
|
||||
|
||||
def test_proxy_replay_auth_filtered(self):
|
||||
headers = [('Proxy-Authorization', 'Basic ' + base64.b64encode('pywb-filt-2:'))]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''))
|
||||
|
||||
self._assert_basic_html(resp)
|
||||
|
||||
assert 'Sun, Jan 26 2014 20:06:24' in resp.body
|
||||
assert 'wb.js' in resp.body
|
||||
|
||||
def test_proxy_replay_auth(self):
|
||||
headers = [('Proxy-Authorization', 'Basic ' + base64.b64encode('pywb'))]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''))
|
||||
|
||||
self._assert_basic_html(resp)
|
||||
|
||||
assert 'Mon, Jan 27 2014 17:12:38' in resp.body
|
||||
assert 'wb.js' in resp.body
|
||||
|
||||
def test_proxy_replay_auth_no_coll(self):
|
||||
headers = [('Proxy-Authorization', 'Basic ' + base64.b64encode('no-such-coll'))]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||
status=407)
|
||||
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_replay_auth_invalid(self):
|
||||
headers = [('Proxy-Authorization', 'abc' + base64.b64encode('no-such-coll'))]
|
||||
resp = self.testapp.get('/x-ignore-this-x', headers = headers,
|
||||
extra_environ = dict(REQUEST_URI = 'http://www.iana.org/', SCRIPT_NAME = ''),
|
||||
status=407)
|
||||
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_pac(self):
|
||||
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
|
||||
assert resp.content_type == 'application/x-ns-proxy-autoconfig'
|
||||
|
Loading…
x
Reference in New Issue
Block a user