mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-26 07:49:24 +01:00
proxy resolver: cookie resolver uses session cookies
proxy static handler: handled via proxy to support http/https use 'pywb.proxy' prefix for custom env settings
This commit is contained in:
parent
9c96026904
commit
607ea1ccf0
@ -14,7 +14,7 @@ from pywb.utils.bufferedreaders import BufferedReader
|
|||||||
|
|
||||||
from certauth import CertificateAuthority
|
from certauth import CertificateAuthority
|
||||||
|
|
||||||
from proxy_resolvers import ProxyAuthResolver
|
from proxy_resolvers import ProxyAuthResolver, CookieResolver
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -68,6 +68,8 @@ class ProxyRouter(object):
|
|||||||
self.resolver = ProxyAuthResolver(routes, proxy_options)
|
self.resolver = ProxyAuthResolver(routes, proxy_options)
|
||||||
#self.resolver = CookieResolver(routes, proxy_options)
|
#self.resolver = CookieResolver(routes, proxy_options)
|
||||||
|
|
||||||
|
self.magic_name = proxy_options.get('magic_name', 'pywb-proxy.com')
|
||||||
|
|
||||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||||
|
|
||||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
||||||
@ -100,7 +102,12 @@ class ProxyRouter(object):
|
|||||||
if not url.startswith(('http://', 'https://')):
|
if not url.startswith(('http://', 'https://')):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
env['pywb.proxy_scheme'] = 'https' if is_https else 'http'
|
env['pywb.proxy_scheme'] = 'http'
|
||||||
|
|
||||||
|
route = None
|
||||||
|
coll = None
|
||||||
|
matcher = None
|
||||||
|
response = None
|
||||||
|
|
||||||
# check resolver, for pre connect resolve
|
# check resolver, for pre connect resolve
|
||||||
if self.resolver.pre_connect:
|
if self.resolver.pre_connect:
|
||||||
@ -115,6 +122,21 @@ class ProxyRouter(object):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
url = env['REL_REQUEST_URI']
|
url = env['REL_REQUEST_URI']
|
||||||
|
else:
|
||||||
|
parts = urlparse.urlsplit(env['REL_REQUEST_URI'])
|
||||||
|
hostport = parts.netloc.split(':', 1)
|
||||||
|
env['pywb.proxy_host'] = hostport[0]
|
||||||
|
env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
|
||||||
|
env['pywb.proxy_req_uri'] = parts.path
|
||||||
|
if parts.query:
|
||||||
|
env['pywb.proxy_req_uri'] += '?' + parts.query
|
||||||
|
|
||||||
|
# static
|
||||||
|
static_prefix = 'static.' + self.magic_name
|
||||||
|
|
||||||
|
if env['pywb.proxy_host'] == static_prefix:
|
||||||
|
env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
|
||||||
|
return None
|
||||||
|
|
||||||
# check resolver, post connect
|
# check resolver, post connect
|
||||||
if not self.resolver.pre_connect:
|
if not self.resolver.pre_connect:
|
||||||
@ -122,11 +144,14 @@ class ProxyRouter(object):
|
|||||||
if response:
|
if response:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
host_prefix = env['pywb.proxy_scheme'] + '://' + static_prefix
|
||||||
|
|
||||||
wbrequest = route.request_class(env,
|
wbrequest = route.request_class(env,
|
||||||
request_uri=url,
|
request_uri=url,
|
||||||
wb_url_str=url,
|
wb_url_str=url,
|
||||||
coll=coll,
|
coll=coll,
|
||||||
host_prefix=self.hostpaths[0],
|
# host_prefix=self.hostpaths[0],
|
||||||
|
host_prefix=host_prefix,
|
||||||
wburl_class=route.handler.get_wburl_type(),
|
wburl_class=route.handler.get_wburl_type(),
|
||||||
urlrewriter_class=HttpsUrlRewriter,
|
urlrewriter_class=HttpsUrlRewriter,
|
||||||
use_abs_prefix=False,
|
use_abs_prefix=False,
|
||||||
@ -136,7 +161,8 @@ class ProxyRouter(object):
|
|||||||
route.apply_filters(wbrequest, matcher)
|
route.apply_filters(wbrequest, matcher)
|
||||||
|
|
||||||
if self.unaltered:
|
if self.unaltered:
|
||||||
wbrequest.wb_url.mod = 'id_'
|
#wbrequest.wb_url.mod = 'id_'
|
||||||
|
wbrequest.wb_url.mod = 'bn_'
|
||||||
|
|
||||||
return route.handler(wbrequest)
|
return route.handler(wbrequest)
|
||||||
|
|
||||||
@ -201,14 +227,16 @@ class ProxyRouter(object):
|
|||||||
|
|
||||||
env['SERVER_PROTOCOL'] = statusparts[2].strip()
|
env['SERVER_PROTOCOL'] = statusparts[2].strip()
|
||||||
|
|
||||||
env['SERVER_NAME'] = hostname
|
env['pywb.proxy_scheme'] = 'https'
|
||||||
env['SERVER_PORT'] = port
|
|
||||||
|
env['pywb.proxy_host'] = hostname
|
||||||
|
env['pywb.proxy_port'] = port
|
||||||
|
env['pywb.proxy_req_uri'] = statusparts[1]
|
||||||
|
|
||||||
queryparts = env['REL_REQUEST_URI'].split('?', 1)
|
queryparts = env['REL_REQUEST_URI'].split('?', 1)
|
||||||
env['PATH_INFO'] = queryparts[0]
|
env['PATH_INFO'] = queryparts[0]
|
||||||
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
|
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
|
||||||
|
|
||||||
env['wsgi.url_scheme'] = 'https'
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
line = buffreader.readline()
|
line = buffreader.readline()
|
||||||
|
@ -2,6 +2,25 @@ from wbrequestresponse import WbResponse, WbRequest
|
|||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
import urlparse
|
import urlparse
|
||||||
import base64
|
import base64
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
import uwsgi
|
||||||
|
uwsgi_cache = True
|
||||||
|
except ImportError:
|
||||||
|
uwsgi_cache = False
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class UwsgiCache(object):
|
||||||
|
def __setitem__(self, item, value):
|
||||||
|
uwsgi.cache_update(item, value)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return uwsgi.cache_get(item)
|
||||||
|
|
||||||
|
def __contains__(self, item):
|
||||||
|
return uwsgi.cache_exists(item)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -104,9 +123,15 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
|||||||
self.cookie_name = config.get('cookie_name', '__pywb_coll')
|
self.cookie_name = config.get('cookie_name', '__pywb_coll')
|
||||||
self.proxy_select_view = config.get('proxy_select_view')
|
self.proxy_select_view = config.get('proxy_select_view')
|
||||||
|
|
||||||
|
if uwsgi_cache:
|
||||||
|
print 'UWSGI CACHE'
|
||||||
|
self.cache = UwsgiCache()
|
||||||
|
else:
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
def get_proxy_coll(self, env):
|
def get_proxy_coll(self, env):
|
||||||
cookie = self.extract_client_cookie(env, self.cookie_name)
|
coll, sesh_id = self.get_coll(env)
|
||||||
return cookie
|
return coll
|
||||||
|
|
||||||
def select_coll_response(self, env):
|
def select_coll_response(self, env):
|
||||||
return self.make_magic_response('auto',
|
return self.make_magic_response('auto',
|
||||||
@ -114,14 +139,15 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
|||||||
env)
|
env)
|
||||||
|
|
||||||
def resolve(self, env):
|
def resolve(self, env):
|
||||||
url = env['REL_REQUEST_URI']
|
server_name = env['pywb.proxy_host']
|
||||||
|
|
||||||
if ('.' + self.magic_name) in url:
|
if ('.' + self.magic_name) in server_name:
|
||||||
return None, None, None, self.handle_magic_page(url, env)
|
return None, None, None, self.handle_magic_page(env)
|
||||||
|
|
||||||
return super(CookieResolver, self).resolve(env)
|
return super(CookieResolver, self).resolve(env)
|
||||||
|
|
||||||
def handle_magic_page(self, url, env):
|
def handle_magic_page(self, env):
|
||||||
|
url = env['REL_REQUEST_URI']
|
||||||
parts = urlparse.urlsplit(url)
|
parts = urlparse.urlsplit(url)
|
||||||
|
|
||||||
path_url = parts.path[1:]
|
path_url = parts.path[1:]
|
||||||
@ -129,58 +155,77 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
|||||||
path_url += '?' + parts.query
|
path_url += '?' + parts.query
|
||||||
|
|
||||||
if parts.netloc.startswith('auto'):
|
if parts.netloc.startswith('auto'):
|
||||||
coll = self.extract_client_cookie(env, self.cookie_name)
|
coll, sesh_id = self.get_coll(env)
|
||||||
|
|
||||||
if coll:
|
if coll:
|
||||||
return self.make_sethost_cookie_response(coll, path_url, env)
|
return self.make_sethost_cookie_response(sesh_id, path_url, env)
|
||||||
else:
|
else:
|
||||||
return self.make_magic_response('select', path_url, env)
|
return self.make_magic_response('select', path_url, env)
|
||||||
|
|
||||||
elif '.set.' in parts.netloc:
|
elif '.set.' in parts.netloc:
|
||||||
coll = parts.netloc.split('.', 1)[0]
|
old_sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||||
headers = self.make_cookie_headers(coll, self.magic_name)
|
sesh_id = self.create_renew_sesh_id(old_sesh_id)
|
||||||
|
|
||||||
return self.make_sethost_cookie_response(coll, path_url, env,
|
if sesh_id != old_sesh_id:
|
||||||
|
headers = self.make_cookie_headers(sesh_id, self.magic_name)
|
||||||
|
else:
|
||||||
|
headers = None
|
||||||
|
|
||||||
|
value, name, _ = parts.netloc.split('.', 2)
|
||||||
|
|
||||||
|
# set sesh value
|
||||||
|
self.cache[sesh_id] = value
|
||||||
|
|
||||||
|
return self.make_sethost_cookie_response(sesh_id, path_url, env,
|
||||||
headers=headers)
|
headers=headers)
|
||||||
|
|
||||||
elif '.sethost.' in parts.netloc:
|
elif '.sethost.' in parts.netloc:
|
||||||
host_parts = parts.netloc.split('.', 1)
|
host_parts = parts.netloc.split('.', 1)
|
||||||
coll = host_parts[0]
|
sesh_id = host_parts[0]
|
||||||
|
|
||||||
inx = parts.netloc.find('.' + self.magic_name + '.')
|
inx = parts.netloc.find('.' + self.magic_name + '.')
|
||||||
domain = parts.netloc[inx + len(self.magic_name) + 2:]
|
domain = parts.netloc[inx + len(self.magic_name) + 2:]
|
||||||
|
|
||||||
headers = self.make_cookie_headers(coll, domain)
|
headers = self.make_cookie_headers(sesh_id, domain)
|
||||||
|
|
||||||
full_url = env['pywb.proxy_scheme'] + '://' + domain
|
full_url = env['pywb.proxy_scheme'] + '://' + domain
|
||||||
full_url += '/' + path_url
|
full_url += '/' + path_url
|
||||||
return WbResponse.redir_response(full_url, headers=headers)
|
return WbResponse.redir_response(full_url, headers=headers)
|
||||||
|
|
||||||
elif self.proxy_select_view:
|
elif 'select.' in parts.netloc:
|
||||||
route_temp = env['pywb.proxy_scheme'] + '://%s.set.'
|
if not self.proxy_select_view:
|
||||||
|
return WbResponse.text_response('select text for ' + path_url)
|
||||||
|
|
||||||
|
coll, sesh_id = self.get_coll(env)
|
||||||
|
|
||||||
|
route_temp = env['pywb.proxy_scheme'] + '://%s.coll.set.'
|
||||||
route_temp += self.magic_name + '/' + path_url
|
route_temp += self.magic_name + '/' + path_url
|
||||||
|
|
||||||
return (self.proxy_select_view.
|
return (self.proxy_select_view.
|
||||||
render_response(routes=self.routes,
|
render_response(routes=self.routes,
|
||||||
route_temp=route_temp,
|
route_temp=route_temp,
|
||||||
|
coll=coll,
|
||||||
url=path_url))
|
url=path_url))
|
||||||
else:
|
|
||||||
return WbResponse.text_response('select text for ' + path_url)
|
|
||||||
|
|
||||||
def make_cookie_headers(self, coll, domain):
|
#else:
|
||||||
|
# msg = 'Invalid Magic Path: ' + url
|
||||||
|
# print msg
|
||||||
|
# return WbResponse.text_response(msg, status='404 Not Found')
|
||||||
|
|
||||||
|
def make_cookie_headers(self, sesh_id, domain):
|
||||||
cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
|
cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
|
||||||
cookie_val = cookie_val.format(self.cookie_name, coll, domain)
|
cookie_val = cookie_val.format(self.cookie_name, sesh_id, domain)
|
||||||
headers = [('Set-Cookie', cookie_val)]
|
headers = [('Set-Cookie', cookie_val)]
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def make_sethost_cookie_response(self, coll, path_url, env, headers=None):
|
def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None):
|
||||||
path_parts = urlparse.urlsplit(path_url)
|
path_parts = urlparse.urlsplit(path_url)
|
||||||
|
|
||||||
new_url = path_parts.path[1:]
|
new_url = path_parts.path[1:]
|
||||||
if path_parts.query:
|
if path_parts.query:
|
||||||
new_url += '?' + path_parts.query
|
new_url += '?' + path_parts.query
|
||||||
|
|
||||||
return self.make_magic_response(coll + '.sethost', new_url, env,
|
return self.make_magic_response(sesh_id + '.sethost', new_url, env,
|
||||||
suffix=path_parts.netloc,
|
suffix=path_parts.netloc,
|
||||||
headers=headers)
|
headers=headers)
|
||||||
|
|
||||||
@ -194,6 +239,23 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
|||||||
full_url += '/' + url
|
full_url += '/' + url
|
||||||
return WbResponse.redir_response(full_url, headers=headers)
|
return WbResponse.redir_response(full_url, headers=headers)
|
||||||
|
|
||||||
|
def get_coll(self, env):
|
||||||
|
sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||||
|
|
||||||
|
coll = None
|
||||||
|
if sesh_id:
|
||||||
|
coll = self.cache[sesh_id]
|
||||||
|
|
||||||
|
return coll, sesh_id
|
||||||
|
|
||||||
|
def create_renew_sesh_id(self, sesh_id, force=False):
|
||||||
|
#if sesh_id in self.cache and not force:
|
||||||
|
if sesh_id and (sesh_id in self.cache) and not force:
|
||||||
|
return sesh_id
|
||||||
|
|
||||||
|
sesh_id = base64.b32encode(os.urandom(5)).lower()
|
||||||
|
return sesh_id
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_client_cookie(env, cookie_name):
|
def extract_client_cookie(env, cookie_name):
|
||||||
cookie_header = env.get('HTTP_COOKIE')
|
cookie_header = env.get('HTTP_COOKIE')
|
||||||
|
@ -152,9 +152,13 @@ class WbResponse(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def text_stream(stream, status='200 OK', content_type='text/plain'):
|
def text_stream(stream, status='200 OK', content_type='text/plain',
|
||||||
status_headers = StatusAndHeaders(status,
|
headers=None):
|
||||||
[('Content-Type', content_type)])
|
def_headers = [('Content-Type', content_type)]
|
||||||
|
if headers:
|
||||||
|
def_headers += headers
|
||||||
|
|
||||||
|
status_headers = StatusAndHeaders(status, def_headers)
|
||||||
|
|
||||||
return WbResponse(status_headers, value=stream)
|
return WbResponse(status_headers, value=stream)
|
||||||
|
|
||||||
|
@ -77,8 +77,8 @@ class WSGIApp(object):
|
|||||||
ssl_sock.write('\r\n')
|
ssl_sock.write('\r\n')
|
||||||
|
|
||||||
for obj in resp_iter:
|
for obj in resp_iter:
|
||||||
ssl_sock.write(obj)
|
if obj:
|
||||||
|
ssl_sock.write(obj)
|
||||||
ssl_sock.close()
|
ssl_sock.close()
|
||||||
|
|
||||||
start_response(env['pywb.proxy_statusline'], [])
|
start_response(env['pywb.proxy_statusline'], [])
|
||||||
@ -125,22 +125,24 @@ class WSGIApp(object):
|
|||||||
else:
|
else:
|
||||||
err_url = None
|
err_url = None
|
||||||
|
|
||||||
|
err_msg = exc.message.encode('utf-8')
|
||||||
|
|
||||||
if print_trace:
|
if print_trace:
|
||||||
import traceback
|
import traceback
|
||||||
err_details = traceback.format_exc(exc)
|
err_details = traceback.format_exc(exc)
|
||||||
print err_details
|
print err_details
|
||||||
else:
|
else:
|
||||||
logging.info(str(exc))
|
logging.info(err_msg)
|
||||||
err_details = None
|
err_details = None
|
||||||
|
|
||||||
if error_view:
|
if error_view:
|
||||||
return error_view.render_response(exc_type=type(exc).__name__,
|
return error_view.render_response(exc_type=type(exc).__name__,
|
||||||
err_msg=str(exc),
|
err_msg=err_msg,
|
||||||
err_details=err_details,
|
err_details=err_details,
|
||||||
status=status,
|
status=status,
|
||||||
err_url=err_url)
|
err_url=err_url)
|
||||||
else:
|
else:
|
||||||
return WbResponse.text_response(status + ' Error: ' + str(exc),
|
return WbResponse.text_response(status + ' Error: ' + err_msg,
|
||||||
status=status)
|
status=status)
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
Loading…
x
Reference in New Issue
Block a user