diff --git a/pywb/framework/certauth.py b/pywb/framework/certauth.py
index 023754af..73b0d0e4 100644
--- a/pywb/framework/certauth.py
+++ b/pywb/framework/certauth.py
@@ -45,13 +45,15 @@ class CertificateAuthority(object):
if not os.path.exists(certs_dir):
os.mkdir(certs_dir)
- def get_cert_for_host(self, host, overwrite=False):
- host_filename = os.path.sep.join([self.certs_dir, '%s.pem' % host])
+ def get_cert_for_host(self, host, overwrite=False, wildcard=False):
+ host_filename = os.path.join(self.certs_dir, host) + '.pem'
if not overwrite and os.path.exists(host_filename):
return False, host_filename
- self.generate_host_cert(host, self.cert, self.key, host_filename)
+ self.generate_host_cert(host, self.cert, self.key, host_filename,
+ wildcard)
+
return True, host_filename
@staticmethod
@@ -107,7 +109,8 @@ class CertificateAuthority(object):
return True, cert, key
@staticmethod
- def generate_host_cert(host, root_cert, root_key, host_filename):
+ def generate_host_cert(host, root_cert, root_key, host_filename,
+ wildcard=False):
# Generate key
key = crypto.PKey()
key.generate_key(crypto.TYPE_RSA, 2048)
@@ -123,6 +126,19 @@ class CertificateAuthority(object):
cert.set_issuer(root_cert.get_subject())
cert.set_pubkey(req.get_pubkey())
+
+ if wildcard:
+ DNS = 'DNS:'
+ alt_hosts = [DNS + host,
+ DNS + '*.' + host]
+
+ alt_hosts = ', '.join(alt_hosts)
+
+ cert.add_extensions([
+ crypto.X509Extension('subjectAltName',
+ False,
+ alt_hosts)])
+
cert.sign(root_key, 'sha1')
# Write cert + key
@@ -163,6 +179,9 @@ def main():
parser.add_argument('-f', '--force', action='store_true')
+ parser.add_argument('-w', '--wildcard_cert', action='store_true',
+ help='add wildcard SAN to host: *., ')
+
result = parser.parse_args()
overwrite = result.force
@@ -170,12 +189,13 @@ def main():
# Create a new signed certificate using specified root
if result.use_root:
certs_dir = result.certs_dir
+ wildcard = result.wildcard
ca = CertificateAuthority(ca_file=result.use_root,
certs_dir=result.certs_dir,
certname=result.name)
created, host_filename = ca.get_cert_for_host(result.output_pem_file,
- overwrite)
+ overwrite, wildcard)
if created:
print ('Created new cert "' + host_filename +
diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py
index ba6d3266..693e7bd0 100644
--- a/pywb/framework/proxy.py
+++ b/pywb/framework/proxy.py
@@ -76,7 +76,6 @@ class ProxyRouter(object):
else:
self.resolver = ProxyAuthResolver(routes, proxy_options)
- self.insert_banner = proxy_options.get('banner_only_replay', False)
self.unaltered = proxy_options.get('unaltered_replay', False)
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
@@ -115,10 +114,11 @@ class ProxyRouter(object):
coll = None
matcher = None
response = None
+ ts = None
# check resolver, for pre connect resolve
if self.resolver.pre_connect:
- route, coll, matcher, response = self.resolver.resolve(env)
+ route, coll, matcher, response, ts = self.resolver.resolve(env)
if response:
return response
@@ -138,26 +138,36 @@ class ProxyRouter(object):
if parts.query:
env['pywb.proxy_req_uri'] += '?' + parts.query
- # select prefix
- env['pywb_proxy_select'] = 'select.' + self.magic_name
+ env['pywb_proxy_magic'] = self.magic_name
+ # route (static) and other resources to archival replay
if env['pywb.proxy_host'] == self.magic_name:
env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
return None
# check resolver, post connect
if not self.resolver.pre_connect:
- route, coll, matcher, response = self.resolver.resolve(env)
+ route, coll, matcher, ts, response = self.resolver.resolve(env)
if response:
return response
host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
+ rel_prefix = ''
+
+ # special case for proxy calendar
+ if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
+ url = env['pywb.proxy_req_uri'][1:]
+ rel_prefix = '/'
+
+ if ts is not None:
+ url = ts + '/' + url
wbrequest = route.request_class(env,
request_uri=url,
wb_url_str=url,
coll=coll,
host_prefix=host_prefix,
+ rel_prefix=rel_prefix,
wburl_class=route.handler.get_wburl_type(),
urlrewriter_class=HttpsUrlRewriter,
use_abs_prefix=False,
@@ -166,10 +176,10 @@ class ProxyRouter(object):
if matcher:
route.apply_filters(wbrequest, matcher)
- if self.insert_banner:
- wbrequest.wb_url.mod = 'bn_'
- elif self.unaltered:
+ if self.unaltered:
wbrequest.wb_url.mod = 'id_'
+ elif is_https:
+ wbrequest.wb_url.mod = 'bn_'
return route.handler(wbrequest)
@@ -209,13 +219,23 @@ class ProxyRouter(object):
sock.send('\r\n')
hostname, port = env['REL_REQUEST_URI'].split(':')
- created, certfile = self.ca.get_cert_for_host(hostname)
+ cert_host = hostname
- ssl_sock = ssl.wrap_socket(sock,
- server_side=True,
- certfile=certfile,
- ciphers="ALL",
- ssl_version=ssl.PROTOCOL_SSLv23)
+ host_parts = hostname.split('.', 1)
+ if len(host_parts) == 2 and '.' in host_parts[1]:
+ cert_host = host_parts[1]
+
+ created, certfile = self.ca.get_cert_for_host(cert_host,
+ wildcard=True)
+
+ try:
+ ssl_sock = ssl.wrap_socket(sock,
+ server_side=True,
+ certfile=certfile,
+ ciphers="ALL",
+ ssl_version=ssl.PROTOCOL_SSLv23)
+ except Exception as se:
+ raise BadRequestException(se.message)
env['pywb.proxy_ssl_sock'] = ssl_sock
@@ -244,7 +264,6 @@ class ProxyRouter(object):
env['PATH_INFO'] = queryparts[0]
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
-
while True:
line = buffreader.readline()
if line:
@@ -270,8 +289,7 @@ class ProxyRouter(object):
remain = buffreader.rem_length()
if remain > 0:
remainder = buffreader.read(self.BLOCK_SIZE)
- input_ = socket._fileobject(ssl_sock, mode='r')
- env['wsgi.input'] = BufferedReader(input_,
+ env['wsgi.input'] = BufferedReader(ssl_sock,
block_size=self.BLOCK_SIZE,
starting_data=remainder)
diff --git a/pywb/framework/proxy_resolvers.py b/pywb/framework/proxy_resolvers.py
index 9062bafd..8fb65b73 100644
--- a/pywb/framework/proxy_resolvers.py
+++ b/pywb/framework/proxy_resolvers.py
@@ -1,5 +1,7 @@
from wbrequestresponse import WbResponse, WbRequest
from pywb.utils.statusandheaders import StatusAndHeaders
+from pywb.rewrite.wburl import WbUrl
+
import urlparse
import base64
import os
@@ -22,6 +24,9 @@ class UwsgiCache(object):
def __contains__(self, item):
return uwsgi.cache_exists(item)
+ def __delitem__(self, item):
+ uwsgi.cache_del(item)
+
#=================================================================
class BaseCollResolver(object):
@@ -34,12 +39,13 @@ class BaseCollResolver(object):
route = None
coll = None
matcher = None
+ ts = None
- proxy_coll = self.get_proxy_coll(env)
+ proxy_coll, ts = self.get_proxy_coll_ts(env)
# invalid parsing
if proxy_coll == '':
- return None, None, None, self.select_coll_response(env)
+ return None, None, None, None, self.select_coll_response(env)
if proxy_coll is None and isinstance(self.use_default_coll, str):
proxy_coll = self.use_default_coll
@@ -56,7 +62,7 @@ class BaseCollResolver(object):
# if no match, return coll selection response
if not route:
- return None, None, None, self.select_coll_response(env)
+ return None, None, None, None, self.select_coll_response(env)
# if 'use_default_coll'
elif self.use_default_coll == True or len(self.routes) == 1:
@@ -65,9 +71,9 @@ class BaseCollResolver(object):
# otherwise, return the appropriate coll selection response
else:
- return None, None, None, self.select_coll_response(env)
+ return None, None, None, None, self.select_coll_response(env)
- return route, coll, matcher, None
+ return route, coll, matcher, ts, None
#=================================================================
@@ -79,14 +85,14 @@ class ProxyAuthResolver(BaseCollResolver):
super(ProxyAuthResolver, self).__init__(routes, config)
self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG)
- def get_proxy_coll(self, env):
+ def get_proxy_coll_ts(self, env):
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
if not proxy_auth:
- return None
+ return None, None
proxy_coll = self.read_basic_auth_coll(proxy_auth)
- return proxy_coll
+ return proxy_coll, None
def select_coll_response(self, env):
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
@@ -120,6 +126,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
config['pre_connect'] = False
super(CookieResolver, self).__init__(routes, config)
self.magic_name = config['magic_name']
+ self.sethost_prefix = '-sethost.' + self.magic_name + '.'
+ self.set_prefix = '-set.' + self.magic_name
+
self.cookie_name = config.get('cookie_name', '__pywb_coll')
self.proxy_select_view = config.get('proxy_select_view')
@@ -128,9 +137,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
else:
self.cache = {}
- def get_proxy_coll(self, env):
- coll, sesh_id = self.get_coll(env)
- return coll
+ def get_proxy_coll_ts(self, env):
+ coll, ts, sesh_id = self.get_coll(env)
+ return coll, ts
def select_coll_response(self, env):
return self.make_magic_response('auto',
@@ -141,27 +150,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
server_name = env['pywb.proxy_host']
if ('.' + self.magic_name) in server_name:
- return None, None, None, self.handle_magic_page(env)
+ response = self.handle_magic_page(env)
+ if response:
+ return None, None, None, None, response
return super(CookieResolver, self).resolve(env)
def handle_magic_page(self, env):
- url = env['REL_REQUEST_URI']
- parts = urlparse.urlsplit(url)
+ request_url = env['REL_REQUEST_URI']
+ parts = urlparse.urlsplit(request_url)
+ server_name = env['pywb.proxy_host']
path_url = parts.path[1:]
if parts.query:
path_url += '?' + parts.query
- if parts.netloc.startswith('auto'):
- coll, sesh_id = self.get_coll(env)
+ if server_name.startswith('auto'):
+ coll, ts, sesh_id = self.get_coll(env)
if coll:
return self.make_sethost_cookie_response(sesh_id, path_url, env)
else:
return self.make_magic_response('select', path_url, env)
- elif '.set.' in parts.netloc:
+ elif server_name.startswith('query.'):
+ wb_url = WbUrl(path_url)
+
+ # only dealing with specific timestamp setting
+ if wb_url.is_query():
+ return None
+
+ coll, ts, sesh_id = self.get_coll(env)
+ if not coll:
+ return self.make_magic_response('select', path_url, env)
+
+ self.set_ts(sesh_id, wb_url.timestamp)
+ return self.make_redir_response(wb_url.url)
+
+ elif server_name.endswith(self.set_prefix):
old_sesh_id = self.extract_client_cookie(env, self.cookie_name)
sesh_id = self.create_renew_sesh_id(old_sesh_id)
@@ -170,34 +196,33 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
else:
headers = None
- value, name, _ = parts.netloc.split('.', 2)
+ coll = server_name[:-len(self.set_prefix)]
# set sesh value
- self.cache[sesh_id] = value
+ self.set_coll(sesh_id, coll)
return self.make_sethost_cookie_response(sesh_id, path_url, env,
headers=headers)
- elif '.sethost.' in parts.netloc:
- host_parts = parts.netloc.split('.', 1)
- sesh_id = host_parts[0]
+ elif self.sethost_prefix in server_name:
+ inx = server_name.find(self.sethost_prefix)
+ sesh_id = server_name[:inx]
- inx = parts.netloc.find('.' + self.magic_name + '.')
- domain = parts.netloc[inx + len(self.magic_name) + 2:]
+ domain = server_name[inx + len(self.sethost_prefix):]
headers = self.make_cookie_headers(sesh_id, domain)
full_url = env['pywb.proxy_scheme'] + '://' + domain
full_url += '/' + path_url
- return WbResponse.redir_response(full_url, headers=headers)
+ return self.make_redir_response(full_url, headers=headers)
- elif 'select.' in parts.netloc:
+ elif 'select.' in server_name:
if not self.proxy_select_view:
return WbResponse.text_response('select text for ' + path_url)
- coll, sesh_id = self.get_coll(env)
+ coll, ts, sesh_id = self.get_coll(env)
- route_temp = env['pywb.proxy_scheme'] + '://%s.coll.set.'
+ route_temp = env['pywb.proxy_scheme'] + '://%s-set.'
route_temp += self.magic_name + '/' + path_url
return (self.proxy_select_view.
@@ -217,14 +242,18 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
headers = [('Set-Cookie', cookie_val)]
return headers
- def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None):
+ def make_sethost_cookie_response(self, sesh_id, path_url,
+ env, headers=None):
+ if '://' not in path_url:
+ path_url = 'http://' + path_url
+
path_parts = urlparse.urlsplit(path_url)
new_url = path_parts.path[1:]
if path_parts.query:
new_url += '?' + path_parts.query
- return self.make_magic_response(sesh_id + '.sethost', new_url, env,
+ return self.make_magic_response(sesh_id + '-sethost', new_url, env,
suffix=path_parts.netloc,
headers=headers)
@@ -236,25 +265,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
if suffix:
full_url += '.' + suffix
full_url += '/' + url
- return WbResponse.redir_response(full_url, headers=headers)
+ return self.make_redir_response(full_url, headers=headers)
+
+ def set_coll(self, sesh_id, coll):
+ self.cache[sesh_id + ':c'] = coll
+
+ def set_ts(self, sesh_id, ts):
+ if ts:
+ self.cache[sesh_id + ':t'] = ts
+ # this ensures that omitting timestamp will reset to latest
+ # capture by deleting the cache entry
+ else:
+ del self.cache[sesh_id + ':t']
def get_coll(self, env):
sesh_id = self.extract_client_cookie(env, self.cookie_name)
coll = None
+ ts = None
if sesh_id:
- coll = self.cache[sesh_id]
+ coll = self.cache[sesh_id + ':c']
+ try:
+ ts = self.cache[sesh_id + ':t']
+ except KeyError:
+ pass
- return coll, sesh_id
+ return coll, ts, sesh_id
def create_renew_sesh_id(self, sesh_id, force=False):
#if sesh_id in self.cache and not force:
- if sesh_id and (sesh_id in self.cache) and not force:
+ if sesh_id and ((sesh_id + ':c') in self.cache) and not force:
return sesh_id
sesh_id = base64.b32encode(os.urandom(5)).lower()
return sesh_id
+ def make_redir_response(self, url, headers=None):
+ return WbResponse.redir_response(url, headers=headers)
+
@staticmethod
def extract_client_cookie(env, cookie_name):
cookie_header = env.get('HTTP_COOKIE')
diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py
index d1a4f772..3498c819 100644
--- a/pywb/framework/wsgi_wrappers.py
+++ b/pywb/framework/wsgi_wrappers.py
@@ -125,7 +125,11 @@ class WSGIApp(object):
else:
err_url = None
- err_msg = exc.message.encode('utf-8')
+ try:
+ err_msg = exc.message.encode('utf-8')
+ except Exception:
+ err_msg = exc.message
+ err_url = ''
if print_trace:
import traceback
diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py
index d5593a22..2679b4dc 100644
--- a/pywb/rewrite/url_rewriter.py
+++ b/pywb/rewrite/url_rewriter.py
@@ -144,7 +144,7 @@ class HttpsUrlRewriter(object):
else:
return url
- def get_timestamp_url(self, timestamp, url):
+ def get_timestamp_url(self, timestamp, url=''):
return url
def get_abs_url(self, url=''):
diff --git a/pywb/static/wb.js b/pywb/static/wb.js
index fb2c3ac3..f4267b8e 100644
--- a/pywb/static/wb.js
+++ b/pywb/static/wb.js
@@ -70,9 +70,13 @@ function init_banner() {
text += "" + capture_str + " ";
- if (wbinfo.proxy_select && wbinfo.url) {
- full_url = wbinfo.proxy_select + "/" + wbinfo.url;
- text += 'Switch Collection ';
+ if (wbinfo.proxy_magic && wbinfo.url) {
+ var select_url = wbinfo.proxy_magic + "/" + wbinfo.url;
+ var query_url = wbinfo.proxy_magic + "/*/" + wbinfo.url;
+ text += ' '
+ text += 'From ' + wbinfo.coll + ' [Switch] ';
+ text += ' ';
+ text += 'View All Captures ';
}
banner.innerHTML = text;
diff --git a/pywb/ui/error.html b/pywb/ui/error.html
index 6453e987..b122fc38 100644
--- a/pywb/ui/error.html
+++ b/pywb/ui/error.html
@@ -10,9 +10,9 @@
{% endif %}
-{% if env.pywb_proxy_select and err_url and status == '404 Not Found' %}
+{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}
-Try Different Collections
+Try Different Collection
{% endif %}
diff --git a/pywb/ui/head_insert.html b/pywb/ui/head_insert.html
index 98330da9..f22ef55a 100644
--- a/pywb/ui/head_insert.html
+++ b/pywb/ui/head_insert.html
@@ -20,7 +20,8 @@
wbinfo.is_frame_mp = {{"true" if wbrequest.wb_url.mod == 'mp_' else "false"}};
wbinfo.canon_url = "{{ canon_url }}";
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
- wbinfo.proxy_select = "{{ wbrequest.env.pywb_proxy_select }}";
+ wbinfo.coll = "{{ wbrequest.coll }}";
+ wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py
index 7cd62a79..3b3a3cc6 100644
--- a/pywb/webapp/pywb_init.py
+++ b/pywb/webapp/pywb_init.py
@@ -78,7 +78,7 @@ def create_live_handler(config):
#=================================================================
def init_route_config(value, config):
- if isinstance(value, str):
+ if isinstance(value, str) or isinstance(value, list):
value = dict(index_paths=value)
route_config = DictChain(value, config)