diff --git a/config.yaml b/config.yaml index 937b4545..fc2290ba 100644 --- a/config.yaml +++ b/config.yaml @@ -109,3 +109,6 @@ enable_memento: true # Replay content in an iframe framed_replay: true + +debug_echo_env: True + diff --git a/pywb/framework/certa.py b/pywb/framework/certa.py new file mode 100644 index 00000000..844ad497 --- /dev/null +++ b/pywb/framework/certa.py @@ -0,0 +1,87 @@ +import logging +import os +import OpenSSL +import random + + +class CertificateAuthority(object): + logger = logging.getLogger('pywb.CertificateAuthority') + + def __init__(self, ca_file='pywb-ca.pem', certs_dir='./pywb-ca'): + self.ca_file = ca_file + self.certs_dir = certs_dir + + if not os.path.exists(ca_file): + self._generate_ca() + else: + self._read_ca(ca_file) + + if not os.path.exists(certs_dir): + self.logger.info("directory for generated certs {} doesn't exist, creating it".format(certs_dir)) + os.mkdir(certs_dir) + + + def _generate_ca(self): + # Generate key + self.key = OpenSSL.crypto.PKey() + self.key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048) + + # Generate certificate + self.cert = OpenSSL.crypto.X509() + self.cert.set_version(3) + # avoid sec_error_reused_issuer_and_serial + self.cert.set_serial_number(random.randint(0,2**64-1)) + self.cert.get_subject().CN = 'pywb CA on {}'.format('') + self.cert.gmtime_adj_notBefore(0) # now + self.cert.gmtime_adj_notAfter(100*365*24*60*60) # 100 yrs in future + self.cert.set_issuer(self.cert.get_subject()) + self.cert.set_pubkey(self.key) + self.cert.add_extensions([ + OpenSSL.crypto.X509Extension(b"basicConstraints", True, b"CA:TRUE, pathlen:0"), + OpenSSL.crypto.X509Extension(b"keyUsage", True, b"keyCertSign, cRLSign"), + OpenSSL.crypto.X509Extension(b"subjectKeyIdentifier", False, b"hash", subject=self.cert), + ]) + self.cert.sign(self.key, "sha1") + + with open(self.ca_file, 'wb+') as f: + f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, self.key)) + f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, self.cert)) + + self.logger.info('generated CA key+cert and wrote to {}'.format(self.ca_file)) + + + def _read_ca(self, filename): + self.cert = OpenSSL.crypto.load_certificate(OpenSSL.SSL.FILETYPE_PEM, open(filename).read()) + self.key = OpenSSL.crypto.load_privatekey(OpenSSL.SSL.FILETYPE_PEM, open(filename).read()) + self.logger.info('read CA key+cert from {}'.format(self.ca_file)) + + def __getitem__(self, cn): + cnp = os.path.sep.join([self.certs_dir, '%s.pem' % cn]) + if not os.path.exists(cnp): + # create certificate + key = OpenSSL.crypto.PKey() + key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048) + + # Generate CSR + req = OpenSSL.crypto.X509Req() + req.get_subject().CN = cn + req.set_pubkey(key) + req.sign(key, 'sha1') + + # Sign CSR + cert = OpenSSL.crypto.X509() + cert.set_subject(req.get_subject()) + cert.set_serial_number(random.randint(0,2**64-1)) + cert.gmtime_adj_notBefore(0) + cert.gmtime_adj_notAfter(10*365*24*60*60) + cert.set_issuer(self.cert.get_subject()) + cert.set_pubkey(req.get_pubkey()) + cert.sign(self.key, 'sha1') + + with open(cnp, 'wb+') as f: + f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key)) + f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert)) + + self.logger.info('wrote generated key+cert to {}'.format(cnp)) + + return cnp diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 6754ecd7..202e4f3b 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -4,8 +4,15 @@ from archivalrouter import ArchivalRouter import urlparse import base64 +import socket +import ssl +from io import BytesIO + from pywb.rewrite.url_rewriter import HttpsUrlRewriter from pywb.utils.statusandheaders import StatusAndHeaders +from pywb.utils.wbexception import BadRequestException + +from certa import CertificateAuthority #================================================================= @@ -61,19 +68,21 @@ class ProxyRouter(object): self.unaltered = proxy_options.get('unaltered_replay', False) + self.ca = CertificateAuthority() + + def __call__(self, env): - if env['REQUEST_METHOD'] == 'CONNECT': - if not self.handle_connect(env): + is_https = (env['REQUEST_METHOD'] == 'CONNECT') + + if not is_https: + url = env['REL_REQUEST_URI'] + + if url.endswith('/proxy.pac'): + return self.make_pac_response(env) + + if not url.startswith(('http://', 'https://')): return None - url = env['REL_REQUEST_URI'] - - if url.endswith('/proxy.pac'): - return self.make_pac_response(env) - - if not url.startswith(('http://', 'https://')): - return None - proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION') route = None @@ -108,6 +117,12 @@ class ProxyRouter(object): else: return self.proxy_auth_coll_response() + # do connect, then get updated url + if is_https: + self.handle_connect(env) + + url = env['REL_REQUEST_URI'] + wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, @@ -126,36 +141,41 @@ class ProxyRouter(object): return route.handler(wbrequest) - def handle_connect(self, env): - import uwsgi - import socket - import ssl - from io import BytesIO - - fd = uwsgi.connection_fd() - conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM) - sock = socket.socket(_sock=conn) - - if (self.use_default_coll or - len(self.routes) == 1 or - env.get('HTTP_PROXY_AUTHORIZATION') is not None): - - sock.send('HTTP/1.0 200 Connection Established\r\n') - sock.send('Server: pywb proxy\r\n') - sock.send('\r\n') + def get_request_socket(self, env): + if env.get('uwsgi.version'): + import uwsgi + fd = uwsgi.connection_fd() + conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM) + sock = socket.socket(_sock=conn) + elif env.get('gunicorn.socket'): + sock = env['gunicorn.socket'] else: - env['pywb.proxy_statusline'] = '407 Proxy Auth Required' - sock.send('HTTP/1.0 407 Proxy Auth Required\r\n') - sock.send('Server: pywb proxy\r\n') - sock.send('\r\n') - return False + # attempt to find socket from wsgi.input + input_ = env.get('wsgi.input') + if input_ and hasattr(input_, '_sock'): + sock = socket.socket(_sock=input_._sock) + + return sock + + def handle_connect(self, env): + sock = self.get_request_socket(env) + if not sock: + return WbResponse.text_response('HTTPS Proxy Not Supported', + '405 HTTPS Proxy Not Supported') + + sock.send('HTTP/1.0 200 Connection Established\r\n') + sock.send('Server: pywb proxy\r\n') + sock.send('\r\n') + + hostname = env['REL_REQUEST_URI'].split(':')[0] ssl_sock = ssl.wrap_socket(sock, server_side=True, - certfile='/tmp/testcert.pem', - ssl_version=ssl.PROTOCOL_SSLv23) + certfile=self.ca[hostname]) + #ssl_version=ssl.PROTOCOL_SSLv23) env['pywb.proxy_ssl_sock'] = ssl_sock + #todo: better reading of all headers buff = ssl_sock.recv(4096) buffreader = BytesIO(buff) @@ -164,7 +184,7 @@ class ProxyRouter(object): statusparts = statusline.split(' ') if len(statusparts) < 3: - return + raise BadRequestException('Invalid Proxy Request') env['REQUEST_METHOD'] = statusparts[0] env['REL_REQUEST_URI'] = ('https://' + @@ -177,6 +197,8 @@ class ProxyRouter(object): env['PATH_INFO'] = queryparts[0] env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else '' + env['wsgi.input'] = socket._fileobject(ssl_sock, mode='r') + while True: line = buffreader.readline() if not line: @@ -189,9 +211,6 @@ class ProxyRouter(object): name = 'HTTP_' + parts[0].replace('-', '_').upper() env[name] = parts[1] - return True - - # Proxy Auto-Config (PAC) script for the proxy def make_pac_response(self, env): import os diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py index b40b5678..1e1100e4 100644 --- a/pywb/framework/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -59,24 +59,27 @@ class WSGIApp(object): def ssl_start_response(statusline, headers): ssl_sock = env.get('pywb.proxy_ssl_sock') if not ssl_sock: + start_response(statusline, headers) return env['pywb.proxy_statusline'] = statusline - ssl_sock.write('HTTP/1.1 ' + statusline + '\r\n') + ssl_sock.write('HTTP/1.0 ' + statusline + '\r\n') for name, value in headers: ssl_sock.write(name + ': ' + value + '\r\n') resp_iter = self.handle_methods(env, ssl_start_response) ssl_sock = env.get('pywb.proxy_ssl_sock') - if ssl_sock: - ssl_sock.write('\r\n') + if not ssl_sock: + return resp_iter - for obj in resp_iter: - ssl_sock.write(obj) + ssl_sock.write('\r\n') - ssl_sock.close() + for obj in resp_iter: + ssl_sock.write(obj) + + ssl_sock.close() start_response(env['pywb.proxy_statusline'], []) @@ -178,6 +181,10 @@ def init_app(init_func, load_yaml=True, config_file=None, config={}): def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover from wsgiref.simple_server import make_server + # disable is_hop_by_hop restrictions + import wsgiref.handlers + wsgiref.handlers.is_hop_by_hop = lambda x: False + port = the_app.port if not port: