mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
proxy: timestamp selection support!
certauth: wildcard support, use *.host wildcard for proxy certs whenever possible ui: add coll info/switch and calendar links to banner
This commit is contained in:
parent
eff5a74ec7
commit
522ea87637
@ -45,13 +45,15 @@ class CertificateAuthority(object):
|
||||
if not os.path.exists(certs_dir):
|
||||
os.mkdir(certs_dir)
|
||||
|
||||
def get_cert_for_host(self, host, overwrite=False):
|
||||
host_filename = os.path.sep.join([self.certs_dir, '%s.pem' % host])
|
||||
def get_cert_for_host(self, host, overwrite=False, wildcard=False):
|
||||
host_filename = os.path.join(self.certs_dir, host) + '.pem'
|
||||
|
||||
if not overwrite and os.path.exists(host_filename):
|
||||
return False, host_filename
|
||||
|
||||
self.generate_host_cert(host, self.cert, self.key, host_filename)
|
||||
self.generate_host_cert(host, self.cert, self.key, host_filename,
|
||||
wildcard)
|
||||
|
||||
return True, host_filename
|
||||
|
||||
@staticmethod
|
||||
@ -107,7 +109,8 @@ class CertificateAuthority(object):
|
||||
return True, cert, key
|
||||
|
||||
@staticmethod
|
||||
def generate_host_cert(host, root_cert, root_key, host_filename):
|
||||
def generate_host_cert(host, root_cert, root_key, host_filename,
|
||||
wildcard=False):
|
||||
# Generate key
|
||||
key = crypto.PKey()
|
||||
key.generate_key(crypto.TYPE_RSA, 2048)
|
||||
@ -123,6 +126,19 @@ class CertificateAuthority(object):
|
||||
|
||||
cert.set_issuer(root_cert.get_subject())
|
||||
cert.set_pubkey(req.get_pubkey())
|
||||
|
||||
if wildcard:
|
||||
DNS = 'DNS:'
|
||||
alt_hosts = [DNS + host,
|
||||
DNS + '*.' + host]
|
||||
|
||||
alt_hosts = ', '.join(alt_hosts)
|
||||
|
||||
cert.add_extensions([
|
||||
crypto.X509Extension('subjectAltName',
|
||||
False,
|
||||
alt_hosts)])
|
||||
|
||||
cert.sign(root_key, 'sha1')
|
||||
|
||||
# Write cert + key
|
||||
@ -163,6 +179,9 @@ def main():
|
||||
|
||||
parser.add_argument('-f', '--force', action='store_true')
|
||||
|
||||
parser.add_argument('-w', '--wildcard_cert', action='store_true',
|
||||
help='add wildcard SAN to host: *.<host>, <host>')
|
||||
|
||||
result = parser.parse_args()
|
||||
|
||||
overwrite = result.force
|
||||
@ -170,12 +189,13 @@ def main():
|
||||
# Create a new signed certificate using specified root
|
||||
if result.use_root:
|
||||
certs_dir = result.certs_dir
|
||||
wildcard = result.wildcard
|
||||
ca = CertificateAuthority(ca_file=result.use_root,
|
||||
certs_dir=result.certs_dir,
|
||||
certname=result.name)
|
||||
|
||||
created, host_filename = ca.get_cert_for_host(result.output_pem_file,
|
||||
overwrite)
|
||||
overwrite, wildcard)
|
||||
|
||||
if created:
|
||||
print ('Created new cert "' + host_filename +
|
||||
|
@ -76,7 +76,6 @@ class ProxyRouter(object):
|
||||
else:
|
||||
self.resolver = ProxyAuthResolver(routes, proxy_options)
|
||||
|
||||
self.insert_banner = proxy_options.get('banner_only_replay', False)
|
||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||
|
||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
||||
@ -115,10 +114,11 @@ class ProxyRouter(object):
|
||||
coll = None
|
||||
matcher = None
|
||||
response = None
|
||||
ts = None
|
||||
|
||||
# check resolver, for pre connect resolve
|
||||
if self.resolver.pre_connect:
|
||||
route, coll, matcher, response = self.resolver.resolve(env)
|
||||
route, coll, matcher, response, ts = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
@ -138,26 +138,36 @@ class ProxyRouter(object):
|
||||
if parts.query:
|
||||
env['pywb.proxy_req_uri'] += '?' + parts.query
|
||||
|
||||
# select prefix
|
||||
env['pywb_proxy_select'] = 'select.' + self.magic_name
|
||||
env['pywb_proxy_magic'] = self.magic_name
|
||||
|
||||
# route (static) and other resources to archival replay
|
||||
if env['pywb.proxy_host'] == self.magic_name:
|
||||
env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
|
||||
return None
|
||||
|
||||
# check resolver, post connect
|
||||
if not self.resolver.pre_connect:
|
||||
route, coll, matcher, response = self.resolver.resolve(env)
|
||||
route, coll, matcher, ts, response = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
|
||||
rel_prefix = ''
|
||||
|
||||
# special case for proxy calendar
|
||||
if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
|
||||
url = env['pywb.proxy_req_uri'][1:]
|
||||
rel_prefix = '/'
|
||||
|
||||
if ts is not None:
|
||||
url = ts + '/' + url
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=url,
|
||||
wb_url_str=url,
|
||||
coll=coll,
|
||||
host_prefix=host_prefix,
|
||||
rel_prefix=rel_prefix,
|
||||
wburl_class=route.handler.get_wburl_type(),
|
||||
urlrewriter_class=HttpsUrlRewriter,
|
||||
use_abs_prefix=False,
|
||||
@ -166,10 +176,10 @@ class ProxyRouter(object):
|
||||
if matcher:
|
||||
route.apply_filters(wbrequest, matcher)
|
||||
|
||||
if self.insert_banner:
|
||||
wbrequest.wb_url.mod = 'bn_'
|
||||
elif self.unaltered:
|
||||
if self.unaltered:
|
||||
wbrequest.wb_url.mod = 'id_'
|
||||
elif is_https:
|
||||
wbrequest.wb_url.mod = 'bn_'
|
||||
|
||||
return route.handler(wbrequest)
|
||||
|
||||
@ -209,13 +219,23 @@ class ProxyRouter(object):
|
||||
sock.send('\r\n')
|
||||
|
||||
hostname, port = env['REL_REQUEST_URI'].split(':')
|
||||
created, certfile = self.ca.get_cert_for_host(hostname)
|
||||
cert_host = hostname
|
||||
|
||||
ssl_sock = ssl.wrap_socket(sock,
|
||||
server_side=True,
|
||||
certfile=certfile,
|
||||
ciphers="ALL",
|
||||
ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
host_parts = hostname.split('.', 1)
|
||||
if len(host_parts) == 2 and '.' in host_parts[1]:
|
||||
cert_host = host_parts[1]
|
||||
|
||||
created, certfile = self.ca.get_cert_for_host(cert_host,
|
||||
wildcard=True)
|
||||
|
||||
try:
|
||||
ssl_sock = ssl.wrap_socket(sock,
|
||||
server_side=True,
|
||||
certfile=certfile,
|
||||
ciphers="ALL",
|
||||
ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
except Exception as se:
|
||||
raise BadRequestException(se.message)
|
||||
|
||||
env['pywb.proxy_ssl_sock'] = ssl_sock
|
||||
|
||||
@ -244,7 +264,6 @@ class ProxyRouter(object):
|
||||
env['PATH_INFO'] = queryparts[0]
|
||||
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
|
||||
|
||||
|
||||
while True:
|
||||
line = buffreader.readline()
|
||||
if line:
|
||||
@ -270,8 +289,7 @@ class ProxyRouter(object):
|
||||
remain = buffreader.rem_length()
|
||||
if remain > 0:
|
||||
remainder = buffreader.read(self.BLOCK_SIZE)
|
||||
input_ = socket._fileobject(ssl_sock, mode='r')
|
||||
env['wsgi.input'] = BufferedReader(input_,
|
||||
env['wsgi.input'] = BufferedReader(ssl_sock,
|
||||
block_size=self.BLOCK_SIZE,
|
||||
starting_data=remainder)
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
from wbrequestresponse import WbResponse, WbRequest
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
|
||||
import urlparse
|
||||
import base64
|
||||
import os
|
||||
@ -22,6 +24,9 @@ class UwsgiCache(object):
|
||||
def __contains__(self, item):
|
||||
return uwsgi.cache_exists(item)
|
||||
|
||||
def __delitem__(self, item):
|
||||
uwsgi.cache_del(item)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class BaseCollResolver(object):
|
||||
@ -34,12 +39,13 @@ class BaseCollResolver(object):
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
ts = None
|
||||
|
||||
proxy_coll = self.get_proxy_coll(env)
|
||||
proxy_coll, ts = self.get_proxy_coll_ts(env)
|
||||
|
||||
# invalid parsing
|
||||
if proxy_coll == '':
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
if proxy_coll is None and isinstance(self.use_default_coll, str):
|
||||
proxy_coll = self.use_default_coll
|
||||
@ -56,7 +62,7 @@ class BaseCollResolver(object):
|
||||
|
||||
# if no match, return coll selection response
|
||||
if not route:
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
# if 'use_default_coll'
|
||||
elif self.use_default_coll == True or len(self.routes) == 1:
|
||||
@ -65,9 +71,9 @@ class BaseCollResolver(object):
|
||||
|
||||
# otherwise, return the appropriate coll selection response
|
||||
else:
|
||||
return None, None, None, self.select_coll_response(env)
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
return route, coll, matcher, None
|
||||
return route, coll, matcher, ts, None
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -79,14 +85,14 @@ class ProxyAuthResolver(BaseCollResolver):
|
||||
super(ProxyAuthResolver, self).__init__(routes, config)
|
||||
self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG)
|
||||
|
||||
def get_proxy_coll(self, env):
|
||||
def get_proxy_coll_ts(self, env):
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
|
||||
if not proxy_auth:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
return proxy_coll
|
||||
return proxy_coll, None
|
||||
|
||||
def select_coll_response(self, env):
|
||||
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
|
||||
@ -120,6 +126,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
config['pre_connect'] = False
|
||||
super(CookieResolver, self).__init__(routes, config)
|
||||
self.magic_name = config['magic_name']
|
||||
self.sethost_prefix = '-sethost.' + self.magic_name + '.'
|
||||
self.set_prefix = '-set.' + self.magic_name
|
||||
|
||||
self.cookie_name = config.get('cookie_name', '__pywb_coll')
|
||||
self.proxy_select_view = config.get('proxy_select_view')
|
||||
|
||||
@ -128,9 +137,9 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
else:
|
||||
self.cache = {}
|
||||
|
||||
def get_proxy_coll(self, env):
|
||||
coll, sesh_id = self.get_coll(env)
|
||||
return coll
|
||||
def get_proxy_coll_ts(self, env):
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
return coll, ts
|
||||
|
||||
def select_coll_response(self, env):
|
||||
return self.make_magic_response('auto',
|
||||
@ -141,27 +150,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
server_name = env['pywb.proxy_host']
|
||||
|
||||
if ('.' + self.magic_name) in server_name:
|
||||
return None, None, None, self.handle_magic_page(env)
|
||||
response = self.handle_magic_page(env)
|
||||
if response:
|
||||
return None, None, None, None, response
|
||||
|
||||
return super(CookieResolver, self).resolve(env)
|
||||
|
||||
def handle_magic_page(self, env):
|
||||
url = env['REL_REQUEST_URI']
|
||||
parts = urlparse.urlsplit(url)
|
||||
request_url = env['REL_REQUEST_URI']
|
||||
parts = urlparse.urlsplit(request_url)
|
||||
server_name = env['pywb.proxy_host']
|
||||
|
||||
path_url = parts.path[1:]
|
||||
if parts.query:
|
||||
path_url += '?' + parts.query
|
||||
|
||||
if parts.netloc.startswith('auto'):
|
||||
coll, sesh_id = self.get_coll(env)
|
||||
if server_name.startswith('auto'):
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
|
||||
if coll:
|
||||
return self.make_sethost_cookie_response(sesh_id, path_url, env)
|
||||
else:
|
||||
return self.make_magic_response('select', path_url, env)
|
||||
|
||||
elif '.set.' in parts.netloc:
|
||||
elif server_name.startswith('query.'):
|
||||
wb_url = WbUrl(path_url)
|
||||
|
||||
# only dealing with specific timestamp setting
|
||||
if wb_url.is_query():
|
||||
return None
|
||||
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
if not coll:
|
||||
return self.make_magic_response('select', path_url, env)
|
||||
|
||||
self.set_ts(sesh_id, wb_url.timestamp)
|
||||
return self.make_redir_response(wb_url.url)
|
||||
|
||||
elif server_name.endswith(self.set_prefix):
|
||||
old_sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||
sesh_id = self.create_renew_sesh_id(old_sesh_id)
|
||||
|
||||
@ -170,34 +196,33 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
else:
|
||||
headers = None
|
||||
|
||||
value, name, _ = parts.netloc.split('.', 2)
|
||||
coll = server_name[:-len(self.set_prefix)]
|
||||
|
||||
# set sesh value
|
||||
self.cache[sesh_id] = value
|
||||
self.set_coll(sesh_id, coll)
|
||||
|
||||
return self.make_sethost_cookie_response(sesh_id, path_url, env,
|
||||
headers=headers)
|
||||
|
||||
elif '.sethost.' in parts.netloc:
|
||||
host_parts = parts.netloc.split('.', 1)
|
||||
sesh_id = host_parts[0]
|
||||
elif self.sethost_prefix in server_name:
|
||||
inx = server_name.find(self.sethost_prefix)
|
||||
sesh_id = server_name[:inx]
|
||||
|
||||
inx = parts.netloc.find('.' + self.magic_name + '.')
|
||||
domain = parts.netloc[inx + len(self.magic_name) + 2:]
|
||||
domain = server_name[inx + len(self.sethost_prefix):]
|
||||
|
||||
headers = self.make_cookie_headers(sesh_id, domain)
|
||||
|
||||
full_url = env['pywb.proxy_scheme'] + '://' + domain
|
||||
full_url += '/' + path_url
|
||||
return WbResponse.redir_response(full_url, headers=headers)
|
||||
return self.make_redir_response(full_url, headers=headers)
|
||||
|
||||
elif 'select.' in parts.netloc:
|
||||
elif 'select.' in server_name:
|
||||
if not self.proxy_select_view:
|
||||
return WbResponse.text_response('select text for ' + path_url)
|
||||
|
||||
coll, sesh_id = self.get_coll(env)
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
|
||||
route_temp = env['pywb.proxy_scheme'] + '://%s.coll.set.'
|
||||
route_temp = env['pywb.proxy_scheme'] + '://%s-set.'
|
||||
route_temp += self.magic_name + '/' + path_url
|
||||
|
||||
return (self.proxy_select_view.
|
||||
@ -217,14 +242,18 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
headers = [('Set-Cookie', cookie_val)]
|
||||
return headers
|
||||
|
||||
def make_sethost_cookie_response(self, sesh_id, path_url, env, headers=None):
|
||||
def make_sethost_cookie_response(self, sesh_id, path_url,
|
||||
env, headers=None):
|
||||
if '://' not in path_url:
|
||||
path_url = 'http://' + path_url
|
||||
|
||||
path_parts = urlparse.urlsplit(path_url)
|
||||
|
||||
new_url = path_parts.path[1:]
|
||||
if path_parts.query:
|
||||
new_url += '?' + path_parts.query
|
||||
|
||||
return self.make_magic_response(sesh_id + '.sethost', new_url, env,
|
||||
return self.make_magic_response(sesh_id + '-sethost', new_url, env,
|
||||
suffix=path_parts.netloc,
|
||||
headers=headers)
|
||||
|
||||
@ -236,25 +265,44 @@ class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
if suffix:
|
||||
full_url += '.' + suffix
|
||||
full_url += '/' + url
|
||||
return WbResponse.redir_response(full_url, headers=headers)
|
||||
return self.make_redir_response(full_url, headers=headers)
|
||||
|
||||
def set_coll(self, sesh_id, coll):
|
||||
self.cache[sesh_id + ':c'] = coll
|
||||
|
||||
def set_ts(self, sesh_id, ts):
|
||||
if ts:
|
||||
self.cache[sesh_id + ':t'] = ts
|
||||
# this ensures that omitting timestamp will reset to latest
|
||||
# capture by deleting the cache entry
|
||||
else:
|
||||
del self.cache[sesh_id + ':t']
|
||||
|
||||
def get_coll(self, env):
|
||||
sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||
|
||||
coll = None
|
||||
ts = None
|
||||
if sesh_id:
|
||||
coll = self.cache[sesh_id]
|
||||
coll = self.cache[sesh_id + ':c']
|
||||
try:
|
||||
ts = self.cache[sesh_id + ':t']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return coll, sesh_id
|
||||
return coll, ts, sesh_id
|
||||
|
||||
def create_renew_sesh_id(self, sesh_id, force=False):
|
||||
#if sesh_id in self.cache and not force:
|
||||
if sesh_id and (sesh_id in self.cache) and not force:
|
||||
if sesh_id and ((sesh_id + ':c') in self.cache) and not force:
|
||||
return sesh_id
|
||||
|
||||
sesh_id = base64.b32encode(os.urandom(5)).lower()
|
||||
return sesh_id
|
||||
|
||||
def make_redir_response(self, url, headers=None):
|
||||
return WbResponse.redir_response(url, headers=headers)
|
||||
|
||||
@staticmethod
|
||||
def extract_client_cookie(env, cookie_name):
|
||||
cookie_header = env.get('HTTP_COOKIE')
|
||||
|
@ -125,7 +125,11 @@ class WSGIApp(object):
|
||||
else:
|
||||
err_url = None
|
||||
|
||||
err_msg = exc.message.encode('utf-8')
|
||||
try:
|
||||
err_msg = exc.message.encode('utf-8')
|
||||
except Exception:
|
||||
err_msg = exc.message
|
||||
err_url = ''
|
||||
|
||||
if print_trace:
|
||||
import traceback
|
||||
|
@ -144,7 +144,7 @@ class HttpsUrlRewriter(object):
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_timestamp_url(self, timestamp, url):
|
||||
def get_timestamp_url(self, timestamp, url=''):
|
||||
return url
|
||||
|
||||
def get_abs_url(self, url=''):
|
||||
|
@ -70,9 +70,13 @@ function init_banner() {
|
||||
|
||||
text += "<b id='_wb_capture_info'>" + capture_str + "</b>";
|
||||
|
||||
if (wbinfo.proxy_select && wbinfo.url) {
|
||||
full_url = wbinfo.proxy_select + "/" + wbinfo.url;
|
||||
text += '<br/><a href="//' + full_url + '">Switch Collection</a>';
|
||||
if (wbinfo.proxy_magic && wbinfo.url) {
|
||||
var select_url = wbinfo.proxy_magic + "/" + wbinfo.url;
|
||||
var query_url = wbinfo.proxy_magic + "/*/" + wbinfo.url;
|
||||
text += '<br/>'
|
||||
text += 'From <b>' + wbinfo.coll + '</b> <a href="//select.' + select_url + '">[Switch]</a>';
|
||||
text += ' ';
|
||||
text += '<a href="//query.' + query_url + '">View All Captures</a>';
|
||||
}
|
||||
|
||||
banner.innerHTML = text;
|
||||
|
@ -10,9 +10,9 @@
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
{% if env.pywb_proxy_select and err_url and status == '404 Not Found' %}
|
||||
{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}
|
||||
<p>
|
||||
<a href="//{{ env.pywb_proxy_select }}/{{ err_url }}">Try Different Collections</a>
|
||||
<a href="//select.{{ env.pywb_proxy_magic }}/{{ err_url }}">Try Different Collection</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
@ -20,7 +20,8 @@
|
||||
wbinfo.is_frame_mp = {{"true" if wbrequest.wb_url.mod == 'mp_' else "false"}};
|
||||
wbinfo.canon_url = "{{ canon_url }}";
|
||||
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
|
||||
wbinfo.proxy_select = "{{ wbrequest.env.pywb_proxy_select }}";
|
||||
wbinfo.coll = "{{ wbrequest.coll }}";
|
||||
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
|
||||
</script>
|
||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
||||
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
|
||||
|
@ -78,7 +78,7 @@ def create_live_handler(config):
|
||||
|
||||
#=================================================================
|
||||
def init_route_config(value, config):
|
||||
if isinstance(value, str):
|
||||
if isinstance(value, str) or isinstance(value, list):
|
||||
value = dict(index_paths=value)
|
||||
|
||||
route_config = DictChain(value, config)
|
||||
|
Loading…
x
Reference in New Issue
Block a user