diff --git a/pywb/framework/certauth.py b/pywb/framework/certauth.py index 023754af..73b0d0e4 100644 --- a/pywb/framework/certauth.py +++ b/pywb/framework/certauth.py @@ -45,13 +45,15 @@ class CertificateAuthority(object): if not os.path.exists(certs_dir): os.mkdir(certs_dir) - def get_cert_for_host(self, host, overwrite=False): - host_filename = os.path.sep.join([self.certs_dir, '%s.pem' % host]) + def get_cert_for_host(self, host, overwrite=False, wildcard=False): + host_filename = os.path.join(self.certs_dir, host) + '.pem' if not overwrite and os.path.exists(host_filename): return False, host_filename - self.generate_host_cert(host, self.cert, self.key, host_filename) + self.generate_host_cert(host, self.cert, self.key, host_filename, + wildcard) + return True, host_filename @staticmethod @@ -107,7 +109,8 @@ class CertificateAuthority(object): return True, cert, key @staticmethod - def generate_host_cert(host, root_cert, root_key, host_filename): + def generate_host_cert(host, root_cert, root_key, host_filename, + wildcard=False): # Generate key key = crypto.PKey() key.generate_key(crypto.TYPE_RSA, 2048) @@ -123,6 +126,19 @@ class CertificateAuthority(object): cert.set_issuer(root_cert.get_subject()) cert.set_pubkey(req.get_pubkey()) + + if wildcard: + DNS = 'DNS:' + alt_hosts = [DNS + host, + DNS + '*.' + host] + + alt_hosts = ', '.join(alt_hosts) + + cert.add_extensions([ + crypto.X509Extension('subjectAltName', + False, + alt_hosts)]) + cert.sign(root_key, 'sha1') # Write cert + key @@ -163,6 +179,9 @@ def main(): parser.add_argument('-f', '--force', action='store_true') + parser.add_argument('-w', '--wildcard_cert', action='store_true', + help='add wildcard SAN to host: *., ') + result = parser.parse_args() overwrite = result.force @@ -170,12 +189,13 @@ def main(): # Create a new signed certificate using specified root if result.use_root: certs_dir = result.certs_dir + wildcard = result.wildcard ca = CertificateAuthority(ca_file=result.use_root, certs_dir=result.certs_dir, certname=result.name) created, host_filename = ca.get_cert_for_host(result.output_pem_file, - overwrite) + overwrite, wildcard) if created: print ('Created new cert "' + host_filename + diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index ba6d3266..693e7bd0 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -76,7 +76,6 @@ class ProxyRouter(object): else: self.resolver = ProxyAuthResolver(routes, proxy_options) - self.insert_banner = proxy_options.get('banner_only_replay', False) self.unaltered = proxy_options.get('unaltered_replay', False) self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH) @@ -115,10 +114,11 @@ class ProxyRouter(object): coll = None matcher = None response = None + ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: - route, coll, matcher, response = self.resolver.resolve(env) + route, coll, matcher, response, ts = self.resolver.resolve(env) if response: return response @@ -138,26 +138,36 @@ class ProxyRouter(object): if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query - # select prefix - env['pywb_proxy_select'] = 'select.' + self.magic_name + env['pywb_proxy_magic'] = self.magic_name + # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] return None # check resolver, post connect if not self.resolver.pre_connect: - route, coll, matcher, response = self.resolver.resolve(env) + route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name + rel_prefix = '' + + # special case for proxy calendar + if (env['pywb.proxy_host'] == 'query.' + self.magic_name): + url = env['pywb.proxy_req_uri'][1:] + rel_prefix = '/' + + if ts is not None: + url = ts + '/' + url wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, + rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=HttpsUrlRewriter, use_abs_prefix=False, @@ -166,10 +176,10 @@ class ProxyRouter(object): if matcher: route.apply_filters(wbrequest, matcher) - if self.insert_banner: - wbrequest.wb_url.mod = 'bn_' - elif self.unaltered: + if self.unaltered: wbrequest.wb_url.mod = 'id_' + elif is_https: + wbrequest.wb_url.mod = 'bn_' return route.handler(wbrequest) @@ -209,13 +219,23 @@ class ProxyRouter(object): sock.send('\r\n') hostname, port = env['REL_REQUEST_URI'].split(':') - created, certfile = self.ca.get_cert_for_host(hostname) + cert_host = hostname - ssl_sock = ssl.wrap_socket(sock, - server_side=True, - certfile=certfile, - ciphers="ALL", - ssl_version=ssl.PROTOCOL_SSLv23) + host_parts = hostname.split('.', 1) + if len(host_parts) == 2 and '.' in host_parts[1]: + cert_host = host_parts[1] + + created, certfile = self.ca.get_cert_for_host(cert_host, + wildcard=True) + + try: + ssl_sock = ssl.wrap_socket(sock, + server_side=True, + certfile=certfile, + ciphers="ALL", + ssl_version=ssl.PROTOCOL_SSLv23) + except Exception as se: + raise BadRequestException(se.message) env['pywb.proxy_ssl_sock'] = ssl_sock @@ -244,7 +264,6 @@ class ProxyRouter(object): env['PATH_INFO'] = queryparts[0] env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else '' - while True: line = buffreader.readline() if line: @@ -270,8 +289,7 @@ class ProxyRouter(object): remain = buffreader.rem_length() if remain > 0: remainder = buffreader.read(self.BLOCK_SIZE) - input_ = socket._fileobject(ssl_sock, mode='r') - env['wsgi.input'] = BufferedReader(input_, + env['wsgi.input'] = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE, starting_data=remainder) diff --git a/pywb/framework/proxy_resolvers.py b/pywb/framework/proxy_resolvers.py index 9062bafd..8fb65b73 100644 --- a/pywb/framework/proxy_resolvers.py +++ b/pywb/framework/proxy_resolvers.py @@ -1,5 +1,7 @@ from wbrequestresponse import WbResponse, WbRequest from pywb.utils.statusandheaders import StatusAndHeaders +from pywb.rewrite.wburl import WbUrl + import urlparse import base64 import os @@ -22,6 +24,9 @@ class UwsgiCache(object): def __contains__(self, item): return uwsgi.cache_exists(item) + def __delitem__(self, item): + uwsgi.cache_del(item) + #================================================================= class BaseCollResolver(object): @@ -34,12 +39,13 @@ class BaseCollResolver(object): route = None coll = None matcher = None + ts = None - proxy_coll = self.get_proxy_coll(env) + proxy_coll, ts = self.get_proxy_coll_ts(env) # invalid parsing if proxy_coll == '': - return None, None, None, self.select_coll_response(env) + return None, None, None, None, self.select_coll_response(env) if proxy_coll is None and isinstance(self.use_default_coll, str): proxy_coll = self.use_default_coll @@ -56,7 +62,7 @@ class BaseCollResolver(object): # if no match, return coll selection response if not route: - return None, None, None, self.select_coll_response(env) + return None, None, None, None, self.select_coll_response(env) # if 'use_default_coll' elif self.use_default_coll == True or len(self.routes) == 1: @@ -65,9 +71,9 @@ class BaseCollResolver(object): # otherwise, return the appropriate coll selection response else: - return None, None, None, self.select_coll_response(env) + return None, None, None, None, self.select_coll_response(env) - return route, coll, matcher, None + return route, coll, matcher, ts, None #================================================================= @@ -79,14 +85,14 @@ class ProxyAuthResolver(BaseCollResolver): super(ProxyAuthResolver, self).__init__(routes, config) self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG) - def get_proxy_coll(self, env): + def get_proxy_coll_ts(self, env): proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION') if not proxy_auth: - return None + return None, None proxy_coll = self.read_basic_auth_coll(proxy_auth) - return proxy_coll + return proxy_coll, None def select_coll_response(self, env): proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg) @@ -120,6 +126,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover config['pre_connect'] = False super(CookieResolver, self).__init__(routes, config) self.magic_name = config['magic_name'] + self.sethost_prefix = '-sethost.' + self.magic_name + '.' + self.set_prefix = '-set.' + self.magic_name + self.cookie_name = config.get('cookie_name', '__pywb_coll') self.proxy_select_view = config.get('proxy_select_view') @@ -128,9 +137,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover else: self.cache = {} - def get_proxy_coll(self, env): - coll, sesh_id = self.get_coll(env) - return coll + def get_proxy_coll_ts(self, env): + coll, ts, sesh_id = self.get_coll(env) + return coll, ts def select_coll_response(self, env): return self.make_magic_response('auto', @@ -141,27 +150,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover server_name = env['pywb.proxy_host'] if ('.' + self.magic_name) in server_name: - return None, None, None, self.handle_magic_page(env) + response = self.handle_magic_page(env) + if response: + return None, None, None, None, response return super(CookieResolver, self).resolve(env) def handle_magic_page(self, env): - url = env['REL_REQUEST_URI'] - parts = urlparse.urlsplit(url) + request_url = env['REL_REQUEST_URI'] + parts = urlparse.urlsplit(request_url) + server_name = env['pywb.proxy_host'] path_url = parts.path[1:] if parts.query: path_url += '?' + parts.query - if parts.netloc.startswith('auto'): - coll, sesh_id = self.get_coll(env) + if server_name.startswith('auto'): + coll, ts, sesh_id = self.get_coll(env) if coll: return self.make_sethost_cookie_response(sesh_id, path_url, env) else: return self.make_magic_response('select', path_url, env) - elif '.set.' in parts.netloc: + elif server_name.startswith('query.'): + wb_url = WbUrl(path_url) + + # only dealing with specific timestamp setting + if wb_url.is_query(): + return None + + coll, ts, sesh_id = self.get_coll(env) + if not coll: + return self.make_magic_response('select', path_url, env) + + self.set_ts(sesh_id, wb_url.timestamp) + return self.make_redir_response(wb_url.url) + + elif server_name.endswith(self.set_prefix): old_sesh_id = self.extract_client_cookie(env, self.cookie_name) sesh_id = self.create_renew_sesh_id(old_sesh_id) @@ -170,34 +196,33 @@ class CookieResolver(BaseCollResolver): # pragma: no cover else: headers = None - value, name, _ = parts.netloc.split('.', 2) + coll = server_name[:-len(self.set_prefix)] # set sesh value - self.cache[sesh_id] = value + self.set_coll(sesh_id, coll) return self.make_sethost_cookie_response(sesh_id, path_url, env, headers=headers) - elif '.sethost.' in parts.netloc: - host_parts = parts.netloc.split('.', 1) - sesh_id = host_parts[0] + elif self.sethost_prefix in server_name: + inx = server_name.find(self.sethost_prefix) + sesh_id = server_name[:inx] - inx = parts.netloc.find('.' + self.magic_name + '.') - domain = parts.netloc[inx + len(self.magic_name) + 2:] + domain = server_name[inx + len(self.sethost_prefix):] headers = self.make_cookie_headers(sesh_id, domain) full_url = env['pywb.proxy_scheme'] + '://' + domain full_url += '/' + path_url - return WbResponse.redir_response(full_url, headers=headers) + return self.make_redir_response(full_url, headers=headers) - elif 'select.' in parts.netloc: + elif 'select.' in server_name: if not self.proxy_select_view: return WbResponse.text_response('select text for ' + path_url) - coll, sesh_id = self.get_coll(env) + coll, ts, sesh_id = self.get_coll(env) - route_temp = env['pywb.proxy_scheme'] + '://%s.coll.set.' + route_temp = env['pywb.proxy_scheme'] + '://%s-set.' route_temp += self.magic_name + '/' + path_url return (self.proxy_select_view. @@ -217,14 +242,18 @@ class CookieResolver(BaseCollResolver): # pragma: no cover headers = [('Set-Cookie', cookie_val)] return headers - def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None): + def make_sethost_cookie_response(self, sesh_id, path_url, + env, headers=None): + if '://' not in path_url: + path_url = 'http://' + path_url + path_parts = urlparse.urlsplit(path_url) new_url = path_parts.path[1:] if path_parts.query: new_url += '?' + path_parts.query - return self.make_magic_response(sesh_id + '.sethost', new_url, env, + return self.make_magic_response(sesh_id + '-sethost', new_url, env, suffix=path_parts.netloc, headers=headers) @@ -236,25 +265,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover if suffix: full_url += '.' + suffix full_url += '/' + url - return WbResponse.redir_response(full_url, headers=headers) + return self.make_redir_response(full_url, headers=headers) + + def set_coll(self, sesh_id, coll): + self.cache[sesh_id + ':c'] = coll + + def set_ts(self, sesh_id, ts): + if ts: + self.cache[sesh_id + ':t'] = ts + # this ensures that omitting timestamp will reset to latest + # capture by deleting the cache entry + else: + del self.cache[sesh_id + ':t'] def get_coll(self, env): sesh_id = self.extract_client_cookie(env, self.cookie_name) coll = None + ts = None if sesh_id: - coll = self.cache[sesh_id] + coll = self.cache[sesh_id + ':c'] + try: + ts = self.cache[sesh_id + ':t'] + except KeyError: + pass - return coll, sesh_id + return coll, ts, sesh_id def create_renew_sesh_id(self, sesh_id, force=False): #if sesh_id in self.cache and not force: - if sesh_id and (sesh_id in self.cache) and not force: + if sesh_id and ((sesh_id + ':c') in self.cache) and not force: return sesh_id sesh_id = base64.b32encode(os.urandom(5)).lower() return sesh_id + def make_redir_response(self, url, headers=None): + return WbResponse.redir_response(url, headers=headers) + @staticmethod def extract_client_cookie(env, cookie_name): cookie_header = env.get('HTTP_COOKIE') diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py index d1a4f772..3498c819 100644 --- a/pywb/framework/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -125,7 +125,11 @@ class WSGIApp(object): else: err_url = None - err_msg = exc.message.encode('utf-8') + try: + err_msg = exc.message.encode('utf-8') + except Exception: + err_msg = exc.message + err_url = '' if print_trace: import traceback diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index d5593a22..2679b4dc 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -144,7 +144,7 @@ class HttpsUrlRewriter(object): else: return url - def get_timestamp_url(self, timestamp, url): + def get_timestamp_url(self, timestamp, url=''): return url def get_abs_url(self, url=''): diff --git a/pywb/static/wb.js b/pywb/static/wb.js index fb2c3ac3..f4267b8e 100644 --- a/pywb/static/wb.js +++ b/pywb/static/wb.js @@ -70,9 +70,13 @@ function init_banner() { text += "" + capture_str + ""; - if (wbinfo.proxy_select && wbinfo.url) { - full_url = wbinfo.proxy_select + "/" + wbinfo.url; - text += '
Switch Collection'; + if (wbinfo.proxy_magic && wbinfo.url) { + var select_url = wbinfo.proxy_magic + "/" + wbinfo.url; + var query_url = wbinfo.proxy_magic + "/*/" + wbinfo.url; + text += '
' + text += 'From ' + wbinfo.coll + ' [Switch]'; + text += '  '; + text += 'View All Captures'; } banner.innerHTML = text; diff --git a/pywb/ui/error.html b/pywb/ui/error.html index 6453e987..b122fc38 100644 --- a/pywb/ui/error.html +++ b/pywb/ui/error.html @@ -10,9 +10,9 @@

{% endif %} -{% if env.pywb_proxy_select and err_url and status == '404 Not Found' %} +{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}

-Try Different Collections +Try Different Collection

{% endif %} diff --git a/pywb/ui/head_insert.html b/pywb/ui/head_insert.html index 98330da9..f22ef55a 100644 --- a/pywb/ui/head_insert.html +++ b/pywb/ui/head_insert.html @@ -20,7 +20,8 @@ wbinfo.is_frame_mp = {{"true" if wbrequest.wb_url.mod == 'mp_' else "false"}}; wbinfo.canon_url = "{{ canon_url }}"; wbinfo.is_live = {{ "true" if cdx.is_live else "false" }}; - wbinfo.proxy_select = "{{ wbrequest.env.pywb_proxy_select }}"; + wbinfo.coll = "{{ wbrequest.coll }}"; + wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}"; diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py index 7cd62a79..3b3a3cc6 100644 --- a/pywb/webapp/pywb_init.py +++ b/pywb/webapp/pywb_init.py @@ -78,7 +78,7 @@ def create_live_handler(config): #================================================================= def init_route_config(value, config): - if isinstance(value, str): + if isinstance(value, str) or isinstance(value, list): value = dict(index_paths=value) route_config = DictChain(value, config)