from wbrequestresponse import WbResponse from pywb.utils.loaders import extract_client_cookie from pywb.utils.statusandheaders import StatusAndHeaders from pywb.rewrite.wburl import WbUrl from cache import create_cache import urlparse import base64 import os #================================================================= class BaseCollResolver(object): def __init__(self, routes, config): self.routes = routes self.pre_connect = config.get('pre_connect', False) self.use_default_coll = config.get('use_default_coll', True) def resolve(self, env): route = None coll = None matcher = None ts = None proxy_coll, ts = self.get_proxy_coll_ts(env) # invalid parsing if proxy_coll == '': return None, None, None, None, self.select_coll_response(env) if proxy_coll is None and isinstance(self.use_default_coll, str): proxy_coll = self.use_default_coll if proxy_coll: proxy_coll = '/' + proxy_coll + '/' for r in self.routes: matcher, c = r.is_handling(proxy_coll) if matcher: route = r coll = c break # if no match, return coll selection response if not route: return None, None, None, None, self.select_coll_response(env) # if 'use_default_coll' elif self.use_default_coll or len(self.routes) == 1: route = self.routes[0] coll = self.routes[0].path # otherwise, return the appropriate coll selection response else: return None, None, None, None, self.select_coll_response(env) return route, coll, matcher, ts, None #================================================================= class ProxyAuthResolver(BaseCollResolver): DEFAULT_MSG = 'Please enter name of a collection to use with proxy mode' def __init__(self, routes, config): config['pre_connect'] = True super(ProxyAuthResolver, self).__init__(routes, config) self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG) def get_proxy_coll_ts(self, env): proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION') if not proxy_auth: return None, None proxy_coll = self.read_basic_auth_coll(proxy_auth) return proxy_coll, None def select_coll_response(self, env): proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg) headers = [('Content-Type', 'text/plain'), ('Proxy-Authenticate', proxy_msg)] status_headers = StatusAndHeaders('407 Proxy Authentication', headers) value = self.auth_msg return WbResponse(status_headers, value=[value]) @staticmethod def read_basic_auth_coll(value): parts = value.split(' ') if parts[0].lower() != 'basic': return '' if len(parts) != 2: return '' user_pass = base64.b64decode(parts[1]) return user_pass.split(':')[0] #================================================================= class CookieResolver(BaseCollResolver): SESH_COOKIE_NAME = '__pywb_proxy_sesh' def __init__(self, routes, config): config['pre_connect'] = False super(CookieResolver, self).__init__(routes, config) self.magic_name = config['magic_name'] self.sethost_prefix = '-sethost.' + self.magic_name + '.' self.set_prefix = '-set.' + self.magic_name self.cookie_name = config.get('cookie_name', self.SESH_COOKIE_NAME) self.proxy_select_view = config.get('proxy_select_view') self.extra_headers = config.get('extra_headers') self.cache = create_cache() def get_proxy_coll_ts(self, env): coll, ts, sesh_id = self.get_coll(env) return coll, ts def select_coll_response(self, env): return self.make_magic_response('auto', env['REL_REQUEST_URI'], env) def resolve(self, env): server_name = env['pywb.proxy_host'] if ('.' + self.magic_name) in server_name: response = self.handle_magic_page(env) if response: return None, None, None, None, response return super(CookieResolver, self).resolve(env) def handle_magic_page(self, env): request_url = env['REL_REQUEST_URI'] parts = urlparse.urlsplit(request_url) server_name = env['pywb.proxy_host'] path_url = parts.path[1:] if parts.query: path_url += '?' + parts.query if server_name.startswith('auto'): coll, ts, sesh_id = self.get_coll(env) if coll: return self.make_sethost_cookie_response(sesh_id, path_url, env) else: return self.make_magic_response('select', path_url, env) elif server_name.startswith('query.'): wb_url = WbUrl(path_url) # only dealing with specific timestamp setting if wb_url.is_query(): return None coll, ts, sesh_id = self.get_coll(env) if not coll: return self.make_magic_response('select', path_url, env) self.set_ts(sesh_id, wb_url.timestamp) return self.make_redir_response(wb_url.url) elif server_name.endswith(self.set_prefix): old_sesh_id = extract_client_cookie(env, self.cookie_name) sesh_id = self.create_renew_sesh_id(old_sesh_id) if sesh_id != old_sesh_id: headers = self.make_cookie_headers(sesh_id, self.magic_name) else: headers = None coll = server_name[:-len(self.set_prefix)] # set sesh value self.set_coll(sesh_id, coll) return self.make_sethost_cookie_response(sesh_id, path_url, env, headers=headers) elif self.sethost_prefix in server_name: inx = server_name.find(self.sethost_prefix) sesh_id = server_name[:inx] domain = server_name[inx + len(self.sethost_prefix):] headers = self.make_cookie_headers(sesh_id, domain) full_url = env['pywb.proxy_scheme'] + '://' + domain full_url += '/' + path_url return self.make_redir_response(full_url, headers=headers) elif 'select.' in server_name: coll, ts, sesh_id = self.get_coll(env) route_temp = '-set.' + self.magic_name + '/' + path_url try: return (self.proxy_select_view. render_response(routes=self.routes, route_temp=route_temp, coll=coll, url=path_url)) except Exception as exc: raise #else: # msg = 'Invalid Magic Path: ' + url # print msg # return WbResponse.text_response(msg, status='404 Not Found') def make_cookie_headers(self, sesh_id, domain): cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly' cookie_val = cookie_val.format(self.cookie_name, sesh_id, domain) headers = [('Set-Cookie', cookie_val)] return headers def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None): if '://' not in path_url: path_url = 'http://' + path_url path_parts = urlparse.urlsplit(path_url) new_url = path_parts.path[1:] if path_parts.query: new_url += '?' + path_parts.query return self.make_magic_response(sesh_id + '-sethost', new_url, env, suffix=path_parts.netloc, headers=headers) def make_magic_response(self, prefix, url, env, suffix=None, headers=None): full_url = env['pywb.proxy_scheme'] + '://' + prefix + '.' full_url += self.magic_name if suffix: full_url += '.' + suffix full_url += '/' + url return self.make_redir_response(full_url, headers=headers) def set_coll(self, sesh_id, coll): self.cache[sesh_id + ':c'] = coll def set_ts(self, sesh_id, ts): if ts: self.cache[sesh_id + ':t'] = ts # this ensures that omitting timestamp will reset to latest # capture by deleting the cache entry else: del self.cache[sesh_id + ':t'] def get_coll(self, env): sesh_id = extract_client_cookie(env, self.cookie_name) coll = None ts = None if sesh_id: coll = self.cache[sesh_id + ':c'] try: ts = self.cache[sesh_id + ':t'] except KeyError: pass return coll, ts, sesh_id def create_renew_sesh_id(self, sesh_id, force=False): #if sesh_id in self.cache and not force: if sesh_id and ((sesh_id + ':c') in self.cache) and not force: return sesh_id sesh_id = base64.b32encode(os.urandom(5)).lower() return sesh_id def make_redir_response(self, url, headers=None): if not headers: headers = [] if self.extra_headers: for name, value in self.extra_headers.iteritems(): headers.append((name, value)) return WbResponse.redir_response(url, headers=headers)