1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

https proxy: add ca generator!

support uwsgi, gunicorn and ref
better handling of 407, other error responses in response to CONNECT
This commit is contained in:
Ilya Kreymer 2014-07-26 13:24:53 -07:00
parent 739f23da9e
commit eca3cf5fbf
4 changed files with 160 additions and 44 deletions

View File

@ -109,3 +109,6 @@ enable_memento: true
# Replay content in an iframe
framed_replay: true
debug_echo_env: True

87
pywb/framework/certa.py Normal file
View File

@ -0,0 +1,87 @@
import logging
import os
import OpenSSL
import random
class CertificateAuthority(object):
logger = logging.getLogger('pywb.CertificateAuthority')
def __init__(self, ca_file='pywb-ca.pem', certs_dir='./pywb-ca'):
self.ca_file = ca_file
self.certs_dir = certs_dir
if not os.path.exists(ca_file):
self._generate_ca()
else:
self._read_ca(ca_file)
if not os.path.exists(certs_dir):
self.logger.info("directory for generated certs {} doesn't exist, creating it".format(certs_dir))
os.mkdir(certs_dir)
def _generate_ca(self):
# Generate key
self.key = OpenSSL.crypto.PKey()
self.key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
# Generate certificate
self.cert = OpenSSL.crypto.X509()
self.cert.set_version(3)
# avoid sec_error_reused_issuer_and_serial
self.cert.set_serial_number(random.randint(0,2**64-1))
self.cert.get_subject().CN = 'pywb CA on {}'.format('')
self.cert.gmtime_adj_notBefore(0) # now
self.cert.gmtime_adj_notAfter(100*365*24*60*60) # 100 yrs in future
self.cert.set_issuer(self.cert.get_subject())
self.cert.set_pubkey(self.key)
self.cert.add_extensions([
OpenSSL.crypto.X509Extension(b"basicConstraints", True, b"CA:TRUE, pathlen:0"),
OpenSSL.crypto.X509Extension(b"keyUsage", True, b"keyCertSign, cRLSign"),
OpenSSL.crypto.X509Extension(b"subjectKeyIdentifier", False, b"hash", subject=self.cert),
])
self.cert.sign(self.key, "sha1")
with open(self.ca_file, 'wb+') as f:
f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, self.key))
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, self.cert))
self.logger.info('generated CA key+cert and wrote to {}'.format(self.ca_file))
def _read_ca(self, filename):
self.cert = OpenSSL.crypto.load_certificate(OpenSSL.SSL.FILETYPE_PEM, open(filename).read())
self.key = OpenSSL.crypto.load_privatekey(OpenSSL.SSL.FILETYPE_PEM, open(filename).read())
self.logger.info('read CA key+cert from {}'.format(self.ca_file))
def __getitem__(self, cn):
cnp = os.path.sep.join([self.certs_dir, '%s.pem' % cn])
if not os.path.exists(cnp):
# create certificate
key = OpenSSL.crypto.PKey()
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)
# Generate CSR
req = OpenSSL.crypto.X509Req()
req.get_subject().CN = cn
req.set_pubkey(key)
req.sign(key, 'sha1')
# Sign CSR
cert = OpenSSL.crypto.X509()
cert.set_subject(req.get_subject())
cert.set_serial_number(random.randint(0,2**64-1))
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(10*365*24*60*60)
cert.set_issuer(self.cert.get_subject())
cert.set_pubkey(req.get_pubkey())
cert.sign(self.key, 'sha1')
with open(cnp, 'wb+') as f:
f.write(OpenSSL.crypto.dump_privatekey(OpenSSL.SSL.FILETYPE_PEM, key))
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
self.logger.info('wrote generated key+cert to {}'.format(cnp))
return cnp

View File

@ -4,8 +4,15 @@ from archivalrouter import ArchivalRouter
import urlparse
import base64
import socket
import ssl
from io import BytesIO
from pywb.rewrite.url_rewriter import HttpsUrlRewriter
from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.utils.wbexception import BadRequestException
from certa import CertificateAuthority
#=================================================================
@ -61,19 +68,21 @@ class ProxyRouter(object):
self.unaltered = proxy_options.get('unaltered_replay', False)
self.ca = CertificateAuthority()
def __call__(self, env):
if env['REQUEST_METHOD'] == 'CONNECT':
if not self.handle_connect(env):
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
if not is_https:
url = env['REL_REQUEST_URI']
if url.endswith('/proxy.pac'):
return self.make_pac_response(env)
if not url.startswith(('http://', 'https://')):
return None
url = env['REL_REQUEST_URI']
if url.endswith('/proxy.pac'):
return self.make_pac_response(env)
if not url.startswith(('http://', 'https://')):
return None
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
route = None
@ -108,6 +117,12 @@ class ProxyRouter(object):
else:
return self.proxy_auth_coll_response()
# do connect, then get updated url
if is_https:
self.handle_connect(env)
url = env['REL_REQUEST_URI']
wbrequest = route.request_class(env,
request_uri=url,
wb_url_str=url,
@ -126,36 +141,41 @@ class ProxyRouter(object):
return route.handler(wbrequest)
def handle_connect(self, env):
import uwsgi
import socket
import ssl
from io import BytesIO
fd = uwsgi.connection_fd()
conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM)
sock = socket.socket(_sock=conn)
if (self.use_default_coll or
len(self.routes) == 1 or
env.get('HTTP_PROXY_AUTHORIZATION') is not None):
sock.send('HTTP/1.0 200 Connection Established\r\n')
sock.send('Server: pywb proxy\r\n')
sock.send('\r\n')
def get_request_socket(self, env):
if env.get('uwsgi.version'):
import uwsgi
fd = uwsgi.connection_fd()
conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM)
sock = socket.socket(_sock=conn)
elif env.get('gunicorn.socket'):
sock = env['gunicorn.socket']
else:
env['pywb.proxy_statusline'] = '407 Proxy Auth Required'
sock.send('HTTP/1.0 407 Proxy Auth Required\r\n')
sock.send('Server: pywb proxy\r\n')
sock.send('\r\n')
return False
# attempt to find socket from wsgi.input
input_ = env.get('wsgi.input')
if input_ and hasattr(input_, '_sock'):
sock = socket.socket(_sock=input_._sock)
return sock
def handle_connect(self, env):
sock = self.get_request_socket(env)
if not sock:
return WbResponse.text_response('HTTPS Proxy Not Supported',
'405 HTTPS Proxy Not Supported')
sock.send('HTTP/1.0 200 Connection Established\r\n')
sock.send('Server: pywb proxy\r\n')
sock.send('\r\n')
hostname = env['REL_REQUEST_URI'].split(':')[0]
ssl_sock = ssl.wrap_socket(sock, server_side=True,
certfile='/tmp/testcert.pem',
ssl_version=ssl.PROTOCOL_SSLv23)
certfile=self.ca[hostname])
#ssl_version=ssl.PROTOCOL_SSLv23)
env['pywb.proxy_ssl_sock'] = ssl_sock
#todo: better reading of all headers
buff = ssl_sock.recv(4096)
buffreader = BytesIO(buff)
@ -164,7 +184,7 @@ class ProxyRouter(object):
statusparts = statusline.split(' ')
if len(statusparts) < 3:
return
raise BadRequestException('Invalid Proxy Request')
env['REQUEST_METHOD'] = statusparts[0]
env['REL_REQUEST_URI'] = ('https://' +
@ -177,6 +197,8 @@ class ProxyRouter(object):
env['PATH_INFO'] = queryparts[0]
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
env['wsgi.input'] = socket._fileobject(ssl_sock, mode='r')
while True:
line = buffreader.readline()
if not line:
@ -189,9 +211,6 @@ class ProxyRouter(object):
name = 'HTTP_' + parts[0].replace('-', '_').upper()
env[name] = parts[1]
return True
# Proxy Auto-Config (PAC) script for the proxy
def make_pac_response(self, env):
import os

View File

@ -59,24 +59,27 @@ class WSGIApp(object):
def ssl_start_response(statusline, headers):
ssl_sock = env.get('pywb.proxy_ssl_sock')
if not ssl_sock:
start_response(statusline, headers)
return
env['pywb.proxy_statusline'] = statusline
ssl_sock.write('HTTP/1.1 ' + statusline + '\r\n')
ssl_sock.write('HTTP/1.0 ' + statusline + '\r\n')
for name, value in headers:
ssl_sock.write(name + ': ' + value + '\r\n')
resp_iter = self.handle_methods(env, ssl_start_response)
ssl_sock = env.get('pywb.proxy_ssl_sock')
if ssl_sock:
ssl_sock.write('\r\n')
if not ssl_sock:
return resp_iter
for obj in resp_iter:
ssl_sock.write(obj)
ssl_sock.write('\r\n')
ssl_sock.close()
for obj in resp_iter:
ssl_sock.write(obj)
ssl_sock.close()
start_response(env['pywb.proxy_statusline'], [])
@ -178,6 +181,10 @@ def init_app(init_func, load_yaml=True, config_file=None, config={}):
def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover
from wsgiref.simple_server import make_server
# disable is_hop_by_hop restrictions
import wsgiref.handlers
wsgiref.handlers.is_hop_by_hop = lambda x: False
port = the_app.port
if not port: