warcprox/warcprox/mitmproxy.py

164 lines
5.8 KiB
Python
Raw Normal View History

2015-03-18 16:29:44 -07:00
from __future__ import absolute_import
2014-11-15 03:20:05 -08:00
try:
import http.server as http_server
except ImportError:
import BaseHTTPServer as http_server
try:
import urllib.parse as urllib_parse
except ImportError:
import urlparse as urllib_parse
import socket
import logging
import ssl
import warcprox
import threading
import datetime
import socks
2014-11-15 03:20:05 -08:00
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
2015-03-18 16:29:44 -07:00
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
2014-11-15 03:20:05 -08:00
def __init__(self, request, client_address, server):
threading.current_thread().name = 'MitmProxyHandler(tid={},started={},client={}:{})'.format(warcprox.gettid(), datetime.datetime.utcnow().isoformat(), client_address[0], client_address[1])
2014-11-15 03:20:05 -08:00
self.is_connect = False
self._headers_buffer = []
request.settimeout(60) # XXX what value should this have?
2014-11-15 03:20:05 -08:00
http_server.BaseHTTPRequestHandler.__init__(self, request, client_address, server)
def _determine_host_port(self):
# Get hostname and port to connect to
if self.is_connect:
self.hostname, self.port = self.path.split(':')
else:
self.url = self.path
u = urllib_parse.urlparse(self.url)
if u.scheme != 'http':
2015-07-24 01:39:11 +00:00
raise Exception('unable to parse request "{}" as a proxy request'.format(self.requestline))
2014-11-15 03:20:05 -08:00
self.hostname = u.hostname
self.port = u.port or 80
self.path = urllib_parse.urlunparse(
urllib_parse.ParseResult(
scheme='',
netloc='',
params=u.params,
path=u.path or '/',
query=u.query,
fragment=u.fragment
)
)
def _connect_to_host(self):
# Connect to destination
if self.onion_tor_socks_proxy_host and self.hostname.lower().endswith('.onion'):
self.logger.info("using tor socks proxy at %s:%s to connect to %s",
self.onion_tor_socks_proxy_host,
self.onion_tor_socks_proxy_port or 1080,
self.hostname)
self._proxy_sock = socks.socksocket()
self._proxy_sock.set_proxy(socks.SOCKS5,
addr=self.onion_tor_socks_proxy_host,
port=self.onion_tor_socks_proxy_port, rdns=True)
else:
self._proxy_sock = socket.socket()
self._proxy_sock.settimeout(60) # XXX what value should this have?
2014-11-15 03:20:05 -08:00
self._proxy_sock.connect((self.hostname, int(self.port)))
# Wrap socket if SSL is required
if self.is_connect:
try:
context = ssl.create_default_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
self._proxy_sock = context.wrap_socket(self._proxy_sock, server_hostname=self.hostname)
except AttributeError:
try:
self._proxy_sock = ssl.wrap_socket(self._proxy_sock)
except ssl.SSLError:
self.logger.warn("failed to establish ssl connection to {}; python ssl library does not support SNI, considering upgrading to python >= 2.7.9 or python 3.4".format(self.hostname))
raise
2014-11-15 03:20:05 -08:00
def _transition_to_ssl(self):
self.request = self.connection = ssl.wrap_socket(self.connection,
server_side=True, certfile=self.server.ca.cert_for_host(self.hostname))
2014-11-15 03:20:05 -08:00
def do_CONNECT(self):
self.is_connect = True
try:
# Connect to destination first
self._determine_host_port()
self._connect_to_host()
# If successful, let's do this!
self.send_response(200, 'Connection established')
self.end_headers()
self._transition_to_ssl()
except Exception as e:
try:
self.logger.error("problem handling {}: {}".format(repr(self.requestline), e))
if type(e) is socket.timeout:
self.send_error(504, str(e))
else:
self.send_error(500, str(e))
except Exception as f:
self.logger.warn("failed to send error response ({}) to proxy client: {}".format(e, f))
2014-11-15 03:20:05 -08:00
return
# Reload!
self.setup()
self.handle_one_request()
def _construct_tunneled_url(self):
if int(self.port) == 443:
netloc = self.hostname
else:
netloc = '{}:{}'.format(self.hostname, self.port)
result = urllib_parse.urlunparse(
urllib_parse.ParseResult(
scheme='https',
netloc=netloc,
params='',
path=self.path,
query='',
fragment=''
)
)
return result
def do_COMMAND(self):
if not self.is_connect:
try:
# Connect to destination
self._determine_host_port()
self._connect_to_host()
assert self.url
except Exception as e:
2015-07-24 01:39:11 +00:00
self.logger.error("problem processing request {}: {}".format(repr(self.requestline), e))
2014-11-15 03:20:05 -08:00
self.send_error(500, str(e))
return
else:
# if self.is_connect we already connected in do_CONNECT
self.url = self._construct_tunneled_url()
2015-07-24 01:39:11 +00:00
try:
self._proxy_request()
except:
self.logger.error("exception proxying request", exc_info=True)
2015-07-24 01:39:11 +00:00
raise
2014-11-15 03:20:05 -08:00
def _proxy_request(self):
raise Exception('_proxy_request() not implemented in MitmProxyHandler, must be implemented in subclass!')
def __getattr__(self, item):
if item.startswith('do_'):
return self.do_COMMAND
def log_error(self, fmt, *args):
self.logger.warn(fmt, *args)
2014-11-15 03:20:05 -08:00