1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

https proxy: add ca generator!

support uwsgi, gunicorn and ref
better handling of 407, other error responses in response to CONNECT
This commit is contained in:
Ilya Kreymer 2014-07-26 13:24:53 -07:00
parent 739f23da9e
commit eca3cf5fbf
4 changed files with 160 additions and 44 deletions

View File

@ -109,3 +109,6 @@ enable_memento: true
# Replay content in an iframe # Replay content in an iframe
framed_replay: true framed_replay: true
debug_echo_env: True

87
pywb/framework/certa.py Normal file
View File

@ -0,0 +1,87 @@
import logging
import os
import OpenSSL
import random
class CertificateAuthority(object):
logger = logging.getLogger('pywb.CertificateAuthority')
def __init__(self, ca_file='pywb-ca.pem', certs_dir='./pywb-ca'):
self.ca_file = ca_file
self.certs_dir = certs_dir
if not os.path.exists(ca_file):
self._generate_ca()
else:
self._read_ca(ca_file)
if not os.path.exists(certs_dir):
self.logger.info("directory for generated certs {} doesn't exist, creating it".format(certs_dir))
os.mkdir(certs_dir)
def _generate_ca(self):
# Generate key
self.key = OpenSSL.crypto.PKey()
self.key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
# Generate certificate
self.cert = OpenSSL.crypto.X509()
self.cert.set_version(3)
# avoid sec_error_reused_issuer_and_serial
self.cert.set_serial_number(random.randint(0,2**64-1))
self.cert.get_subject().CN = 'pywb CA on {}'.format('')
self.cert.gmtime_adj_notBefore(0) # now
self.cert.gmtime_adj_notAfter(100*365*24*60*60) # 100 yrs in future
self.cert.set_issuer(self.cert.get_subject())
self.cert.set_pubkey(self.key)
self.cert.add_extensions([
OpenSSL.crypto.X509Extension(b"basicConstraints", True, b"CA:TRUE, pathlen:0"),
OpenSSL.crypto.X509Extension(b"keyUsage", True, b"keyCertSign, cRLSign"),
OpenSSL.crypto.X509Extension(b"subjectKeyIdentifier", False, b"hash", subject=self.cert),
])
self.cert.sign(self.key, "sha1")
with open(self.ca_file, 'wb+') as f:
f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, self.key))
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, self.cert))
self.logger.info('generated CA key+cert and wrote to {}'.format(self.ca_file))
def _read_ca(self, filename):
self.cert = OpenSSL.crypto.load_certificate(OpenSSL.SSL.FILETYPE_PEM, open(filename).read())
self.key = OpenSSL.crypto.load_privatekey(OpenSSL.SSL.FILETYPE_PEM, open(filename).read())
self.logger.info('read CA key+cert from {}'.format(self.ca_file))
def __getitem__(self, cn):
cnp = os.path.sep.join([self.certs_dir, '%s.pem' % cn])
if not os.path.exists(cnp):
# create certificate
key = OpenSSL.crypto.PKey()
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
# Generate CSR
req = OpenSSL.crypto.X509Req()
req.get_subject().CN = cn
req.set_pubkey(key)
req.sign(key, 'sha1')
# Sign CSR
cert = OpenSSL.crypto.X509()
cert.set_subject(req.get_subject())
cert.set_serial_number(random.randint(0,2**64-1))
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(10*365*24*60*60)
cert.set_issuer(self.cert.get_subject())
cert.set_pubkey(req.get_pubkey())
cert.sign(self.key, 'sha1')
with open(cnp, 'wb+') as f:
f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key))
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
self.logger.info('wrote generated key+cert to {}'.format(cnp))
return cnp

View File

@ -4,8 +4,15 @@ from archivalrouter import ArchivalRouter
import urlparse import urlparse
import base64 import base64
import socket
import ssl
from io import BytesIO
from pywb.rewrite.url_rewriter import HttpsUrlRewriter from pywb.rewrite.url_rewriter import HttpsUrlRewriter
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.utils.wbexception import BadRequestException
from certa import CertificateAuthority
#================================================================= #=================================================================
@ -61,19 +68,21 @@ class ProxyRouter(object):
self.unaltered = proxy_options.get('unaltered_replay', False) self.unaltered = proxy_options.get('unaltered_replay', False)
self.ca = CertificateAuthority()
def __call__(self, env): def __call__(self, env):
if env['REQUEST_METHOD'] == 'CONNECT': is_https = (env['REQUEST_METHOD'] == 'CONNECT')
if not self.handle_connect(env):
if not is_https:
url = env['REL_REQUEST_URI']
if url.endswith('/proxy.pac'):
return self.make_pac_response(env)
if not url.startswith(('http://', 'https://')):
return None return None
url = env['REL_REQUEST_URI']
if url.endswith('/proxy.pac'):
return self.make_pac_response(env)
if not url.startswith(('http://', 'https://')):
return None
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION') proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
route = None route = None
@ -108,6 +117,12 @@ class ProxyRouter(object):
else: else:
return self.proxy_auth_coll_response() return self.proxy_auth_coll_response()
# do connect, then get updated url
if is_https:
self.handle_connect(env)
url = env['REL_REQUEST_URI']
wbrequest = route.request_class(env, wbrequest = route.request_class(env,
request_uri=url, request_uri=url,
wb_url_str=url, wb_url_str=url,
@ -126,36 +141,41 @@ class ProxyRouter(object):
return route.handler(wbrequest) return route.handler(wbrequest)
def handle_connect(self, env): def get_request_socket(self, env):
import uwsgi if env.get('uwsgi.version'):
import socket import uwsgi
import ssl fd = uwsgi.connection_fd()
from io import BytesIO conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM)
sock = socket.socket(_sock=conn)
fd = uwsgi.connection_fd() elif env.get('gunicorn.socket'):
conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM) sock = env['gunicorn.socket']
sock = socket.socket(_sock=conn)
if (self.use_default_coll or
len(self.routes) == 1 or
env.get('HTTP_PROXY_AUTHORIZATION') is not None):
sock.send('HTTP/1.0 200 Connection Established\r\n')
sock.send('Server: pywb proxy\r\n')
sock.send('\r\n')
else: else:
env['pywb.proxy_statusline'] = '407 Proxy Auth Required' # attempt to find socket from wsgi.input
sock.send('HTTP/1.0 407 Proxy Auth Required\r\n') input_ = env.get('wsgi.input')
sock.send('Server: pywb proxy\r\n') if input_ and hasattr(input_, '_sock'):
sock.send('\r\n') sock = socket.socket(_sock=input_._sock)
return False
return sock
def handle_connect(self, env):
sock = self.get_request_socket(env)
if not sock:
return WbResponse.text_response('HTTPS Proxy Not Supported',
'405 HTTPS Proxy Not Supported')
sock.send('HTTP/1.0 200 Connection Established\r\n')
sock.send('Server: pywb proxy\r\n')
sock.send('\r\n')
hostname = env['REL_REQUEST_URI'].split(':')[0]
ssl_sock = ssl.wrap_socket(sock, server_side=True, ssl_sock = ssl.wrap_socket(sock, server_side=True,
certfile='/tmp/testcert.pem', certfile=self.ca[hostname])
ssl_version=ssl.PROTOCOL_SSLv23) #ssl_version=ssl.PROTOCOL_SSLv23)
env['pywb.proxy_ssl_sock'] = ssl_sock env['pywb.proxy_ssl_sock'] = ssl_sock
#todo: better reading of all headers
buff = ssl_sock.recv(4096) buff = ssl_sock.recv(4096)
buffreader = BytesIO(buff) buffreader = BytesIO(buff)
@ -164,7 +184,7 @@ class ProxyRouter(object):
statusparts = statusline.split(' ') statusparts = statusline.split(' ')
if len(statusparts) < 3: if len(statusparts) < 3:
return raise BadRequestException('Invalid Proxy Request')
env['REQUEST_METHOD'] = statusparts[0] env['REQUEST_METHOD'] = statusparts[0]
env['REL_REQUEST_URI'] = ('https://' + env['REL_REQUEST_URI'] = ('https://' +
@ -177,6 +197,8 @@ class ProxyRouter(object):
env['PATH_INFO'] = queryparts[0] env['PATH_INFO'] = queryparts[0]
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else '' env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
env['wsgi.input'] = socket._fileobject(ssl_sock, mode='r')
while True: while True:
line = buffreader.readline() line = buffreader.readline()
if not line: if not line:
@ -189,9 +211,6 @@ class ProxyRouter(object):
name = 'HTTP_' + parts[0].replace('-', '_').upper() name = 'HTTP_' + parts[0].replace('-', '_').upper()
env[name] = parts[1] env[name] = parts[1]
return True
# Proxy Auto-Config (PAC) script for the proxy # Proxy Auto-Config (PAC) script for the proxy
def make_pac_response(self, env): def make_pac_response(self, env):
import os import os

View File

@ -59,24 +59,27 @@ class WSGIApp(object):
def ssl_start_response(statusline, headers): def ssl_start_response(statusline, headers):
ssl_sock = env.get('pywb.proxy_ssl_sock') ssl_sock = env.get('pywb.proxy_ssl_sock')
if not ssl_sock: if not ssl_sock:
start_response(statusline, headers)
return return
env['pywb.proxy_statusline'] = statusline env['pywb.proxy_statusline'] = statusline
ssl_sock.write('HTTP/1.1 ' + statusline + '\r\n') ssl_sock.write('HTTP/1.0 ' + statusline + '\r\n')
for name, value in headers: for name, value in headers:
ssl_sock.write(name + ': ' + value + '\r\n') ssl_sock.write(name + ': ' + value + '\r\n')
resp_iter = self.handle_methods(env, ssl_start_response) resp_iter = self.handle_methods(env, ssl_start_response)
ssl_sock = env.get('pywb.proxy_ssl_sock') ssl_sock = env.get('pywb.proxy_ssl_sock')
if ssl_sock: if not ssl_sock:
ssl_sock.write('\r\n') return resp_iter
for obj in resp_iter: ssl_sock.write('\r\n')
ssl_sock.write(obj)
ssl_sock.close() for obj in resp_iter:
ssl_sock.write(obj)
ssl_sock.close()
start_response(env['pywb.proxy_statusline'], []) start_response(env['pywb.proxy_statusline'], [])
@ -178,6 +181,10 @@ def init_app(init_func, load_yaml=True, config_file=None, config={}):
def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover
from wsgiref.simple_server import make_server from wsgiref.simple_server import make_server
# disable is_hop_by_hop restrictions
import wsgiref.handlers
wsgiref.handlers.is_hop_by_hop = lambda x: False
port = the_app.port port = the_app.port
if not port: if not port: