mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
proxy improvements: refactor coll selector into BaseCollSelector,
supporting either proxy auth or cookie-based selection (in progress) https proxy: support POST requests, properly read http header and wrap remainder in wsgi.input https proxy: properly update wsgi for wrapped request wbrequestresponse: add content-length 0 to redir_response
This commit is contained in:
parent
b6fb0e510e
commit
6234d795dc
@ -12,6 +12,8 @@ from pywb.rewrite.url_rewriter import HttpsUrlRewriter
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
from pywb.utils.wbexception import BadRequestException
|
||||
|
||||
from pywb.utils.bufferedreaders import BufferedReader
|
||||
|
||||
from certauth import CertificateAuthority
|
||||
|
||||
|
||||
@ -51,8 +53,10 @@ class ProxyRouter(object):
|
||||
for more details.
|
||||
"""
|
||||
|
||||
PAC_PATH = '/proxy.pac'
|
||||
BLOCK_SIZE = 4096
|
||||
|
||||
def __init__(self, routes, **kwargs):
|
||||
self.routes = routes
|
||||
self.hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
self.error_view = kwargs.get('error_view')
|
||||
@ -61,13 +65,14 @@ class ProxyRouter(object):
|
||||
if proxy_options:
|
||||
proxy_options = proxy_options.get('proxy_options', {})
|
||||
|
||||
self.auth_msg = proxy_options.get('auth_msg',
|
||||
'Please enter name of a collection to use for proxy mode')
|
||||
|
||||
self.use_default_coll = proxy_options.get('use_default_coll', True)
|
||||
self.resolver = ProxyAuthResolver(routes, proxy_options)
|
||||
#self.resolver = CookieResolver(routes, proxy_options)
|
||||
|
||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||
|
||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
||||
|
||||
|
||||
if proxy_options.get('enable_https_proxy'):
|
||||
ca_file = proxy_options.get('root_ca_file')
|
||||
|
||||
@ -85,48 +90,23 @@ class ProxyRouter(object):
|
||||
def __call__(self, env):
|
||||
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
|
||||
|
||||
# for non-https requests, check pac path and non-proxy urls
|
||||
if not is_https:
|
||||
url = env['REL_REQUEST_URI']
|
||||
|
||||
if url.endswith('/proxy.pac'):
|
||||
if url == self.proxy_pac_path:
|
||||
return self.make_pac_response(env)
|
||||
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
return None
|
||||
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
env['pywb.proxy_scheme'] = 'https' if is_https else 'http'
|
||||
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
|
||||
if proxy_auth:
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
|
||||
if not proxy_coll:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
if matcher:
|
||||
route = r
|
||||
coll = c
|
||||
break
|
||||
|
||||
if not route:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
# if 'use_default_coll' or only one collection, use that
|
||||
# for proxy mode
|
||||
elif self.use_default_coll or len(self.routes) == 1:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].regex.pattern
|
||||
|
||||
# otherwise, require proxy auth 407 to select collection
|
||||
else:
|
||||
return self.proxy_auth_coll_response()
|
||||
# check resolver, for pre connect resolve
|
||||
if self.resolver.pre_connect:
|
||||
route, coll, matcher, response = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
# do connect, then get updated url
|
||||
if is_https:
|
||||
@ -136,6 +116,12 @@ class ProxyRouter(object):
|
||||
|
||||
url = env['REL_REQUEST_URI']
|
||||
|
||||
# check resolver, post connect
|
||||
if not self.resolver.pre_connect:
|
||||
route, coll, matcher, response = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=url,
|
||||
wb_url_str=url,
|
||||
@ -189,20 +175,18 @@ class ProxyRouter(object):
|
||||
sock.send('Server: pywb proxy\r\n')
|
||||
sock.send('\r\n')
|
||||
|
||||
hostname = env['REL_REQUEST_URI'].split(':')[0]
|
||||
hostname, port = env['REL_REQUEST_URI'].split(':')
|
||||
created, certfile = self.ca.get_cert_for_host(hostname)
|
||||
|
||||
ssl_sock = ssl.wrap_socket(sock,
|
||||
server_side=True,
|
||||
certfile=certfile)
|
||||
#ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
certfile=certfile,
|
||||
ciphers="ALL",
|
||||
ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
|
||||
env['pywb.proxy_ssl_sock'] = ssl_sock
|
||||
|
||||
#todo: better reading of all headers
|
||||
buff = ssl_sock.recv(4096)
|
||||
|
||||
buffreader = BytesIO(buff)
|
||||
buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)
|
||||
|
||||
statusline = buffreader.readline()
|
||||
statusparts = statusline.split(' ')
|
||||
@ -217,23 +201,44 @@ class ProxyRouter(object):
|
||||
|
||||
env['SERVER_PROTOCOL'] = statusparts[2].strip()
|
||||
|
||||
env['SERVER_NAME'] = hostname
|
||||
env['SERVER_PORT'] = port
|
||||
|
||||
queryparts = env['REL_REQUEST_URI'].split('?', 1)
|
||||
env['PATH_INFO'] = queryparts[0]
|
||||
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
|
||||
|
||||
env['wsgi.input'] = socket._fileobject(ssl_sock, mode='r')
|
||||
env['wsgi.url_scheme'] = 'https'
|
||||
|
||||
while True:
|
||||
line = buffreader.readline()
|
||||
if line:
|
||||
line = line.rstrip()
|
||||
|
||||
if not line:
|
||||
break
|
||||
|
||||
parts = line.split(':')
|
||||
parts = line.split(':', 1)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
|
||||
name = 'HTTP_' + parts[0].replace('-', '_').upper()
|
||||
env[name] = parts[1]
|
||||
name = parts[0].strip()
|
||||
value = parts[1].strip()
|
||||
|
||||
name = name.replace('-', '_').upper()
|
||||
|
||||
if not name in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
|
||||
name = 'HTTP_' + name
|
||||
|
||||
env[name] = value
|
||||
|
||||
remain = buffreader.rem_length()
|
||||
if remain > 0:
|
||||
remainder = buffreader.read(self.BLOCK_SIZE)
|
||||
input_ = socket._fileobject(ssl_sock, mode='r')
|
||||
env['wsgi.input'] = BufferedReader(input_,
|
||||
block_size=self.BLOCK_SIZE,
|
||||
starting_data=remainder)
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
def make_pac_response(self, env):
|
||||
@ -263,7 +268,73 @@ class ProxyRouter(object):
|
||||
|
||||
return WbResponse.text_response(buff, content_type=content_type)
|
||||
|
||||
def proxy_auth_coll_response(self):
|
||||
|
||||
#=================================================================
|
||||
class BaseCollResolver(object):
|
||||
def __init__(self, routes, config):
|
||||
self.routes = routes
|
||||
self.pre_connect = config.get('pre_connect', False)
|
||||
self.use_default_coll = config.get('use_default_coll', True)
|
||||
|
||||
def resolve(self, env):
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
|
||||
proxy_coll = self.get_proxy_coll(env)
|
||||
|
||||
# invalid parsing
|
||||
if proxy_coll == '':
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
|
||||
if proxy_coll is None and isinstance(self.use_default_coll, str):
|
||||
proxy_coll = self.use_default_coll
|
||||
|
||||
if proxy_coll:
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
if matcher:
|
||||
route = r
|
||||
coll = c
|
||||
break
|
||||
|
||||
# if no match, return coll selection response
|
||||
if not route:
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
|
||||
# if 'use_default_coll'
|
||||
elif self.use_default_coll == True or len(self.routes) == 1:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].path
|
||||
|
||||
# otherwise, return the appropriate coll selection response
|
||||
else:
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
|
||||
return route, coll, matcher, None
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ProxyAuthResolver(BaseCollResolver):
|
||||
DEFAULT_MSG = 'Please enter name of a collection to use with proxy mode'
|
||||
|
||||
def __init__(self, routes, config):
|
||||
config['pre_connect'] = True
|
||||
super(ProxyAuthResolver, self).__init__(routes, config)
|
||||
self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG)
|
||||
|
||||
def get_proxy_coll(self, env):
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
|
||||
if not proxy_auth:
|
||||
return None
|
||||
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
return proxy_coll
|
||||
|
||||
def select_coll_response(self, env):
|
||||
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
|
||||
|
||||
headers = [('Content-Type', 'text/plain'),
|
||||
@ -286,3 +357,128 @@ class ProxyRouter(object):
|
||||
|
||||
user_pass = base64.b64decode(parts[1])
|
||||
return user_pass.split(':')[0]
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CookieResolver(BaseCollResolver):
|
||||
def __init__(self, routes, config):
|
||||
config['pre_connect'] = False
|
||||
super(CookieResolver, self).__init__(routes, config)
|
||||
self.magic_name = config.get('magic_name', 'pywb-proxy.com')
|
||||
self.cookie_name = config.get('cookie_name', '__pywb_coll')
|
||||
self.proxy_select_view = config.get('proxy_select_view')
|
||||
|
||||
def get_proxy_coll(self, env):
|
||||
cookie = self.extract_client_cookie(env, self.cookie_name)
|
||||
return cookie
|
||||
|
||||
def select_coll_response(self, env):
|
||||
return self.make_magic_response('auto',
|
||||
env['REL_REQUEST_URI'],
|
||||
env)
|
||||
|
||||
def resolve(self, env):
|
||||
url = env['REL_REQUEST_URI']
|
||||
|
||||
if ('.' + self.magic_name) in url:
|
||||
return None, None, None, self.handle_magic_page(url, env)
|
||||
|
||||
return super(CookieResolver, self).resolve(env)
|
||||
|
||||
def handle_magic_page(self, url, env):
|
||||
parts = urlparse.urlsplit(url)
|
||||
|
||||
path_url = parts.path[1:]
|
||||
if parts.query:
|
||||
path_url += '?' + parts.query
|
||||
|
||||
if parts.netloc.startswith('auto'):
|
||||
coll = self.extract_client_cookie(env, self.cookie_name)
|
||||
|
||||
if coll:
|
||||
return self.make_sethost_cookie_response(coll, path_url, env)
|
||||
else:
|
||||
return self.make_magic_response('select', path_url, env)
|
||||
|
||||
elif '.set.' in parts.netloc:
|
||||
coll = parts.netloc.split('.', 1)[0]
|
||||
headers = self.make_cookie_headers(coll, self.magic_name)
|
||||
|
||||
return self.make_sethost_cookie_response(coll, path_url, env,
|
||||
headers=headers)
|
||||
|
||||
elif '.sethost.' in parts.netloc:
|
||||
host_parts = parts.netloc.split('.', 1)
|
||||
coll = host_parts[0]
|
||||
|
||||
inx = parts.netloc.find('.' + self.magic_name + '.')
|
||||
domain = parts.netloc[inx + len(self.magic_name) + 2:]
|
||||
|
||||
headers = self.make_cookie_headers(coll, domain)
|
||||
|
||||
full_url = env['pywb.proxy_scheme'] + '://' + domain
|
||||
full_url += '/' + path_url
|
||||
return WbResponse.redir_response(full_url, headers=headers)
|
||||
|
||||
elif self.proxy_select_view:
|
||||
route_temp = env['pywb.proxy_scheme'] + '://%s.set.'
|
||||
route_temp += self.magic_name + '/' + path_url
|
||||
|
||||
return (self.proxy_select_view.
|
||||
render_response(routes=self.routes,
|
||||
route_temp=route_temp,
|
||||
url=path_url))
|
||||
else:
|
||||
return WbResponse.text_response('select text for ' + path_url)
|
||||
|
||||
def make_cookie_headers(self, coll, domain):
|
||||
cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
|
||||
cookie_val = cookie_val.format(self.cookie_name, coll, domain)
|
||||
headers = [('Set-Cookie', cookie_val)]
|
||||
return headers
|
||||
|
||||
def make_sethost_cookie_response(self, coll, path_url, env, headers=None):
|
||||
path_parts = urlparse.urlsplit(path_url)
|
||||
|
||||
new_url = path_parts.path[1:]
|
||||
if path_parts.query:
|
||||
new_url += '?' + path_parts.query
|
||||
|
||||
return self.make_magic_response(coll + '.sethost', new_url, env,
|
||||
suffix=path_parts.netloc,
|
||||
headers=headers)
|
||||
|
||||
|
||||
def make_magic_response(self, prefix, url, env,
|
||||
suffix=None, headers=None):
|
||||
full_url = env['pywb.proxy_scheme'] + '://' + prefix + '.'
|
||||
full_url += self.magic_name
|
||||
if suffix:
|
||||
full_url += '.' + suffix
|
||||
full_url += '/' + url
|
||||
return WbResponse.redir_response(full_url, headers=headers)
|
||||
|
||||
@staticmethod
|
||||
def extract_client_cookie(env, cookie_name):
|
||||
cookie_header = env.get('HTTP_COOKIE')
|
||||
if not cookie_header:
|
||||
return None
|
||||
|
||||
# attempt to extract cookie_name only
|
||||
inx = cookie_header.find(cookie_name)
|
||||
if inx < 0:
|
||||
return None
|
||||
|
||||
end_inx = cookie_header.find(';', inx)
|
||||
if end_inx > 0:
|
||||
value = cookie_header[inx:end_inx]
|
||||
else:
|
||||
value = cookie_header[inx:]
|
||||
|
||||
value = value.split('=')
|
||||
if len(value) < 2:
|
||||
return None
|
||||
|
||||
value = value[1].strip()
|
||||
return value
|
||||
|
||||
|
@ -46,7 +46,7 @@
|
||||
{'body': ['Test', 'Another'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '404', headers = [('Content-Type', 'text/plain')])}
|
||||
|
||||
>>> WbResponse.redir_response('http://example.com/otherfile')
|
||||
{'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile')])}
|
||||
{'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile'), ('Content-Length', '0')])}
|
||||
|
||||
"""
|
||||
|
||||
|
@ -125,7 +125,7 @@ class WbRequest(object):
|
||||
if not self.wb_url:
|
||||
return
|
||||
|
||||
mime = self.env.get('CONTENT_TYPE')
|
||||
mime = self.env.get('CONTENT_TYPE').split(';')[0]
|
||||
length = self.env.get('CONTENT_LENGTH')
|
||||
stream = self.env['wsgi.input']
|
||||
|
||||
@ -167,9 +167,12 @@ class WbResponse(object):
|
||||
return WbResponse(status_headers, value=[text])
|
||||
|
||||
@staticmethod
|
||||
def redir_response(location, status='302 Redirect'):
|
||||
return WbResponse(StatusAndHeaders(status,
|
||||
[('Location', location)]))
|
||||
def redir_response(location, status='302 Redirect', headers=None):
|
||||
redir_headers = [('Location', location), ('Content-Length', '0')]
|
||||
if headers:
|
||||
redir_headers += headers
|
||||
|
||||
return WbResponse(StatusAndHeaders(status, redir_headers))
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
|
||||
|
@ -64,7 +64,7 @@ class WSGIApp(object):
|
||||
|
||||
env['pywb.proxy_statusline'] = statusline
|
||||
|
||||
ssl_sock.write('HTTP/1.0 ' + statusline + '\r\n')
|
||||
ssl_sock.write('HTTP/1.1 ' + statusline + '\r\n')
|
||||
for name, value in headers:
|
||||
ssl_sock.write(name + ': ' + value + '\r\n')
|
||||
|
||||
|
@ -215,13 +215,17 @@ def create_wb_router(passed_config={}):
|
||||
if hasattr(route.handler, 'resolve_refs'):
|
||||
route.handler.resolve_refs(handler_dict)
|
||||
|
||||
|
||||
# Check for new proxy mode!
|
||||
if config.get('enable_http_proxy', False):
|
||||
router = ProxyArchivalRouter
|
||||
else:
|
||||
router = ArchivalRouter
|
||||
|
||||
if config.get('proxy_select_html'):
|
||||
temp = J2TemplateView.create_template(config.get('proxy_select_html'),
|
||||
'Proxy Coll Selector')
|
||||
config.get('proxy_options')['proxy_select_view'] = temp
|
||||
|
||||
# Finally, create wb router
|
||||
return router(
|
||||
routes,
|
||||
|
Loading…
x
Reference in New Issue
Block a user