1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

proxy resolver: cookie resolver uses session cookies

proxy static handler: handled via proxy to support http/https
use 'pywb.proxy' prefix for custom env settings
This commit is contained in:
Ilya Kreymer 2014-07-29 12:23:41 -07:00
parent 9c96026904
commit 607ea1ccf0
4 changed files with 132 additions and 36 deletions

View File

@ -14,7 +14,7 @@ from pywb.utils.bufferedreaders import BufferedReader
from certauth import CertificateAuthority
from proxy_resolvers import ProxyAuthResolver
from proxy_resolvers import ProxyAuthResolver, CookieResolver
#=================================================================
@ -68,6 +68,8 @@ class ProxyRouter(object):
self.resolver = ProxyAuthResolver(routes, proxy_options)
#self.resolver = CookieResolver(routes, proxy_options)
self.magic_name = proxy_options.get('magic_name', 'pywb-proxy.com')
self.unaltered = proxy_options.get('unaltered_replay', False)
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
@ -100,7 +102,12 @@ class ProxyRouter(object):
if not url.startswith(('http://', 'https://')):
return None
env['pywb.proxy_scheme'] = 'https' if is_https else 'http'
env['pywb.proxy_scheme'] = 'http'
route = None
coll = None
matcher = None
response = None
# check resolver, for pre connect resolve
if self.resolver.pre_connect:
@ -115,6 +122,21 @@ class ProxyRouter(object):
return response
url = env['REL_REQUEST_URI']
else:
parts = urlparse.urlsplit(env['REL_REQUEST_URI'])
hostport = parts.netloc.split(':', 1)
env['pywb.proxy_host'] = hostport[0]
env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
env['pywb.proxy_req_uri'] = parts.path
if parts.query:
env['pywb.proxy_req_uri'] += '?' + parts.query
# static
static_prefix = 'static.' + self.magic_name
if env['pywb.proxy_host'] == static_prefix:
env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
return None
# check resolver, post connect
if not self.resolver.pre_connect:
@ -122,11 +144,14 @@ class ProxyRouter(object):
if response:
return response
host_prefix = env['pywb.proxy_scheme'] + '://' + static_prefix
wbrequest = route.request_class(env,
request_uri=url,
wb_url_str=url,
coll=coll,
host_prefix=self.hostpaths[0],
# host_prefix=self.hostpaths[0],
host_prefix=host_prefix,
wburl_class=route.handler.get_wburl_type(),
urlrewriter_class=HttpsUrlRewriter,
use_abs_prefix=False,
@ -136,7 +161,8 @@ class ProxyRouter(object):
route.apply_filters(wbrequest, matcher)
if self.unaltered:
wbrequest.wb_url.mod = 'id_'
#wbrequest.wb_url.mod = 'id_'
wbrequest.wb_url.mod = 'bn_'
return route.handler(wbrequest)
@ -201,14 +227,16 @@ class ProxyRouter(object):
env['SERVER_PROTOCOL'] = statusparts[2].strip()
env['SERVER_NAME'] = hostname
env['SERVER_PORT'] = port
env['pywb.proxy_scheme'] = 'https'
env['pywb.proxy_host'] = hostname
env['pywb.proxy_port'] = port
env['pywb.proxy_req_uri'] = statusparts[1]
queryparts = env['REL_REQUEST_URI'].split('?', 1)
env['PATH_INFO'] = queryparts[0]
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
env['wsgi.url_scheme'] = 'https'
while True:
line = buffreader.readline()

View File

@ -2,6 +2,25 @@ from wbrequestresponse import WbResponse, WbRequest
from pywb.utils.statusandheaders import StatusAndHeaders
import urlparse
import base64
import os
try:
import uwsgi
uwsgi_cache = True
except ImportError:
uwsgi_cache = False
#=================================================================
class UwsgiCache(object):
def __setitem__(self, item, value):
uwsgi.cache_update(item, value)
def __getitem__(self, item):
return uwsgi.cache_get(item)
def __contains__(self, item):
return uwsgi.cache_exists(item)
#=================================================================
@ -104,9 +123,15 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
self.cookie_name = config.get('cookie_name', '__pywb_coll')
self.proxy_select_view = config.get('proxy_select_view')
if uwsgi_cache:
print 'UWSGI CACHE'
self.cache = UwsgiCache()
else:
self.cache = {}
def get_proxy_coll(self, env):
cookie = self.extract_client_cookie(env, self.cookie_name)
return cookie
coll, sesh_id = self.get_coll(env)
return coll
def select_coll_response(self, env):
return self.make_magic_response('auto',
@ -114,14 +139,15 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
env)
def resolve(self, env):
url = env['REL_REQUEST_URI']
server_name = env['pywb.proxy_host']
if ('.' + self.magic_name) in url:
return None, None, None, self.handle_magic_page(url, env)
if ('.' + self.magic_name) in server_name:
return None, None, None, self.handle_magic_page(env)
return super(CookieResolver, self).resolve(env)
def handle_magic_page(self, url, env):
def handle_magic_page(self, env):
url = env['REL_REQUEST_URI']
parts = urlparse.urlsplit(url)
path_url = parts.path[1:]
@ -129,58 +155,77 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
path_url += '?' + parts.query
if parts.netloc.startswith('auto'):
coll = self.extract_client_cookie(env, self.cookie_name)
coll, sesh_id = self.get_coll(env)
if coll:
return self.make_sethost_cookie_response(coll, path_url, env)
return self.make_sethost_cookie_response(sesh_id, path_url, env)
else:
return self.make_magic_response('select', path_url, env)
elif '.set.' in parts.netloc:
coll = parts.netloc.split('.', 1)[0]
headers = self.make_cookie_headers(coll, self.magic_name)
old_sesh_id = self.extract_client_cookie(env, self.cookie_name)
sesh_id = self.create_renew_sesh_id(old_sesh_id)
return self.make_sethost_cookie_response(coll, path_url, env,
if sesh_id != old_sesh_id:
headers = self.make_cookie_headers(sesh_id, self.magic_name)
else:
headers = None
value, name, _ = parts.netloc.split('.', 2)
# set sesh value
self.cache[sesh_id] = value
return self.make_sethost_cookie_response(sesh_id, path_url, env,
headers=headers)
elif '.sethost.' in parts.netloc:
host_parts = parts.netloc.split('.', 1)
coll = host_parts[0]
sesh_id = host_parts[0]
inx = parts.netloc.find('.' + self.magic_name + '.')
domain = parts.netloc[inx + len(self.magic_name) + 2:]
headers = self.make_cookie_headers(coll, domain)
headers = self.make_cookie_headers(sesh_id, domain)
full_url = env['pywb.proxy_scheme'] + '://' + domain
full_url += '/' + path_url
return WbResponse.redir_response(full_url, headers=headers)
elif self.proxy_select_view:
route_temp = env['pywb.proxy_scheme'] + '://%s.set.'
elif 'select.' in parts.netloc:
if not self.proxy_select_view:
return WbResponse.text_response('select text for ' + path_url)
coll, sesh_id = self.get_coll(env)
route_temp = env['pywb.proxy_scheme'] + '://%s.coll.set.'
route_temp += self.magic_name + '/' + path_url
return (self.proxy_select_view.
render_response(routes=self.routes,
route_temp=route_temp,
coll=coll,
url=path_url))
else:
return WbResponse.text_response('select text for ' + path_url)
def make_cookie_headers(self, coll, domain):
#else:
# msg = 'Invalid Magic Path: ' + url
# print msg
# return WbResponse.text_response(msg, status='404 Not Found')
def make_cookie_headers(self, sesh_id, domain):
cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
cookie_val = cookie_val.format(self.cookie_name, coll, domain)
cookie_val = cookie_val.format(self.cookie_name, sesh_id, domain)
headers = [('Set-Cookie', cookie_val)]
return headers
def make_sethost_cookie_response(self, coll, path_url, env, headers=None):
def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None):
path_parts = urlparse.urlsplit(path_url)
new_url = path_parts.path[1:]
if path_parts.query:
new_url += '?' + path_parts.query
return self.make_magic_response(coll + '.sethost', new_url, env,
return self.make_magic_response(sesh_id + '.sethost', new_url, env,
suffix=path_parts.netloc,
headers=headers)
@ -194,6 +239,23 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
full_url += '/' + url
return WbResponse.redir_response(full_url, headers=headers)
def get_coll(self, env):
sesh_id = self.extract_client_cookie(env, self.cookie_name)
coll = None
if sesh_id:
coll = self.cache[sesh_id]
return coll, sesh_id
def create_renew_sesh_id(self, sesh_id, force=False):
#if sesh_id in self.cache and not force:
if sesh_id and (sesh_id in self.cache) and not force:
return sesh_id
sesh_id = base64.b32encode(os.urandom(5)).lower()
return sesh_id
@staticmethod
def extract_client_cookie(env, cookie_name):
cookie_header = env.get('HTTP_COOKIE')

View File

@ -152,9 +152,13 @@ class WbResponse(object):
pass
@staticmethod
def text_stream(stream, status='200 OK', content_type='text/plain'):
status_headers = StatusAndHeaders(status,
[('Content-Type', content_type)])
def text_stream(stream, status='200 OK', content_type='text/plain',
headers=None):
def_headers = [('Content-Type', content_type)]
if headers:
def_headers += headers
status_headers = StatusAndHeaders(status, def_headers)
return WbResponse(status_headers, value=stream)

View File

@ -77,8 +77,8 @@ class WSGIApp(object):
ssl_sock.write('\r\n')
for obj in resp_iter:
ssl_sock.write(obj)
if obj:
ssl_sock.write(obj)
ssl_sock.close()
start_response(env['pywb.proxy_statusline'], [])
@ -125,22 +125,24 @@ class WSGIApp(object):
else:
err_url = None
err_msg = exc.message.encode('utf-8')
if print_trace:
import traceback
err_details = traceback.format_exc(exc)
print err_details
else:
logging.info(str(exc))
logging.info(err_msg)
err_details = None
if error_view:
return error_view.render_response(exc_type=type(exc).__name__,
err_msg=str(exc),
err_msg=err_msg,
err_details=err_details,
status=status,
err_url=err_url)
else:
return WbResponse.text_response(status + ' Error: ' + str(exc),
return WbResponse.text_response(status + ' Error: ' + err_msg,
status=status)
#=================================================================