Share code, handle exception during CONNECT

This commit is contained in:
Noah Levitt 2020-05-06 09:54:17 -07:00
parent 4ceebe1fa9
commit a5e9c27223
2 changed files with 49 additions and 53 deletions

View File

@ -363,7 +363,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
else:
self.send_error(500, str(e))
except Exception as f:
self.logger.warning("failed to send error response ({}) to proxy client: {}".format(e, f))
self.logger.warning("failed to send error response ({}) to proxy client: {}".format(e, f), exc_info=True)
return
# Reload!
@ -489,6 +489,31 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.server.unregister_remote_server_sock(
self._remote_server_conn.sock)
def _swallow_hop_by_hop_headers(self):
'''
Swallow headers that don't make sense to forward on, i.e.
most hop-by-hop headers.
http://tools.ietf.org/html/rfc2616#section-13.5.
'''
# self.headers is an email.message.Message, which is case-insensitive
# and doesn't throw KeyError in __delitem__
for key in (
'Warcprox-Meta', 'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
def _build_request(self):
req_str = '{} {} {}\r\n'.format(
self.command, self.path, self.request_version)
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
req = req_str.encode('latin1') + b'\r\n\r\n'
def _inner_proxy_request(self, extra_response_headers={}):
'''
Sends the request to the remote server, then uses a ProxyingRecorder to
@ -500,29 +525,11 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
It may contain extra HTTP headers such as ``Warcprox-Meta`` which
are written in the WARC record for this request.
'''
# Build request
req_str = '{} {} {}\r\n'.format(
self.command, self.path, self.request_version)
# Swallow headers that don't make sense to forward on, i.e. most
# hop-by-hop headers. http://tools.ietf.org/html/rfc2616#section-13.5.
# self.headers is an email.message.Message, which is case-insensitive
# and doesn't throw KeyError in __delitem__
for key in (
'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
self._swallow_hop_by_hop_headers()
self.headers['Via'] = via_header_value(
self.headers.get('Via'),
self.request_version.replace('HTTP/', ''))
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
req = req_str.encode('latin1') + b'\r\n\r\n'
req = self._build_request()
# Append message body if present to the request
if 'Content-Length' in self.headers:
@ -548,7 +555,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
try:
buf = prox_rec_res.read(65536)
except http_client.IncompleteRead as e:
self.logger.warn('%s from %s', e, self.url)
self.logger.warning('%s from %s', e, self.url)
buf = e.partial
if (self._max_resource_size and

View File

@ -188,16 +188,21 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
self._enforce_limits_and_blocks()
return warcprox.mitmproxy.MitmProxyHandler._connect_to_remote_server(self)
def _proxy_request(self):
warcprox_meta = None
def _parse_warcprox_meta(self):
'''
:return: Warcprox-Meta request header value as a dictionary, or None
'''
raw_warcprox_meta = self.headers.get('Warcprox-Meta')
self.logger.trace(
'request for %s Warcprox-Meta header: %s', self.url,
raw_warcprox_meta)
'request for %s Warcprox-Meta header: %s', self.url,
raw_warcprox_meta)
if raw_warcprox_meta:
warcprox_meta = json.loads(raw_warcprox_meta)
del self.headers['Warcprox-Meta']
return json.loads(raw_warcprox_meta)
else:
return None
def _proxy_request(self):
warcprox_meta = self._parse_warcprox_meta()
remote_ip = self._remote_server_conn.sock.getpeername()[0]
timestamp = doublethink.utcnow()
extra_response_headers = {}
@ -343,36 +348,21 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
except:
self.logger.error("uncaught exception in do_WARCPROX_WRITE_RECORD", exc_info=True)
raise
def send_error(self, code, message=None, explain=None, exception=None):
super().send_error(code, message=message, explain=explain, exception=exception)
# Build request
req_str = '{} {} {}\r\n'.format(
self.command, self.path, self.request_version)
# If error happens during CONNECT handling and before the inner request, self.url
# is unset, and self.path is something like 'example.com:443'
urlish = self.url or self.path
# Swallow headers that don't make sense to forward on, i.e. most
# hop-by-hop headers. http://tools.ietf.org/html/rfc2616#section-13.5.
# self.headers is an email.message.Message, which is case-insensitive
# and doesn't throw KeyError in __delitem__
for key in (
'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
warcprox_meta = self._parse_warcprox_meta()
self._swallow_hop_by_hop_headers()
request_data = self._build_request()
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
warcprox_meta = None
raw_warcprox_meta = self.headers.get('Warcprox-Meta')
if raw_warcprox_meta:
warcprox_meta = json.loads(raw_warcprox_meta)
req = req_str.encode('latin1') + b'\r\n\r\n'
failed_url = FailedUrl(
url=self.url,
request_data=req,
url=urlish,
request_data=request_data,
warcprox_meta=warcprox_meta,
status=code,
client_ip=self.client_address[0],
@ -386,7 +376,6 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
self.server.recorded_url_q.put(failed_url)
def log_message(self, fmt, *args):
# logging better handled elsewhere?
pass