Share code, handle exception during CONNECT

This commit is contained in:
Noah Levitt 2020-05-06 09:54:17 -07:00
parent 4ceebe1fa9
commit a5e9c27223
2 changed files with 49 additions and 53 deletions

View File

@ -363,7 +363,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
else: else:
self.send_error(500, str(e)) self.send_error(500, str(e))
except Exception as f: except Exception as f:
self.logger.warning("failed to send error response ({}) to proxy client: {}".format(e, f)) self.logger.warning("failed to send error response ({}) to proxy client: {}".format(e, f), exc_info=True)
return return
# Reload! # Reload!
@ -489,6 +489,31 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.server.unregister_remote_server_sock( self.server.unregister_remote_server_sock(
self._remote_server_conn.sock) self._remote_server_conn.sock)
def _swallow_hop_by_hop_headers(self):
'''
Swallow headers that don't make sense to forward on, i.e.
most hop-by-hop headers.
http://tools.ietf.org/html/rfc2616#section-13.5.
'''
# self.headers is an email.message.Message, which is case-insensitive
# and doesn't throw KeyError in __delitem__
for key in (
'Warcprox-Meta', 'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
def _build_request(self):
req_str = '{} {} {}\r\n'.format(
self.command, self.path, self.request_version)
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
req = req_str.encode('latin1') + b'\r\n\r\n'
def _inner_proxy_request(self, extra_response_headers={}): def _inner_proxy_request(self, extra_response_headers={}):
''' '''
Sends the request to the remote server, then uses a ProxyingRecorder to Sends the request to the remote server, then uses a ProxyingRecorder to
@ -500,29 +525,11 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
It may contain extra HTTP headers such as ``Warcprox-Meta`` which It may contain extra HTTP headers such as ``Warcprox-Meta`` which
are written in the WARC record for this request. are written in the WARC record for this request.
''' '''
# Build request self._swallow_hop_by_hop_headers()
req_str = '{} {} {}\r\n'.format(
self.command, self.path, self.request_version)
# Swallow headers that don't make sense to forward on, i.e. most
# hop-by-hop headers. http://tools.ietf.org/html/rfc2616#section-13.5.
# self.headers is an email.message.Message, which is case-insensitive
# and doesn't throw KeyError in __delitem__
for key in (
'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
self.headers['Via'] = via_header_value( self.headers['Via'] = via_header_value(
self.headers.get('Via'), self.headers.get('Via'),
self.request_version.replace('HTTP/', '')) self.request_version.replace('HTTP/', ''))
req = self._build_request()
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
req = req_str.encode('latin1') + b'\r\n\r\n'
# Append message body if present to the request # Append message body if present to the request
if 'Content-Length' in self.headers: if 'Content-Length' in self.headers:
@ -548,7 +555,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
try: try:
buf = prox_rec_res.read(65536) buf = prox_rec_res.read(65536)
except http_client.IncompleteRead as e: except http_client.IncompleteRead as e:
self.logger.warn('%s from %s', e, self.url) self.logger.warning('%s from %s', e, self.url)
buf = e.partial buf = e.partial
if (self._max_resource_size and if (self._max_resource_size and

View File

@ -188,16 +188,21 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
self._enforce_limits_and_blocks() self._enforce_limits_and_blocks()
return warcprox.mitmproxy.MitmProxyHandler._connect_to_remote_server(self) return warcprox.mitmproxy.MitmProxyHandler._connect_to_remote_server(self)
def _proxy_request(self): def _parse_warcprox_meta(self):
warcprox_meta = None '''
:return: Warcprox-Meta request header value as a dictionary, or None
'''
raw_warcprox_meta = self.headers.get('Warcprox-Meta') raw_warcprox_meta = self.headers.get('Warcprox-Meta')
self.logger.trace( self.logger.trace(
'request for %s Warcprox-Meta header: %s', self.url, 'request for %s Warcprox-Meta header: %s', self.url,
raw_warcprox_meta) raw_warcprox_meta)
if raw_warcprox_meta: if raw_warcprox_meta:
warcprox_meta = json.loads(raw_warcprox_meta) return json.loads(raw_warcprox_meta)
del self.headers['Warcprox-Meta'] else:
return None
def _proxy_request(self):
warcprox_meta = self._parse_warcprox_meta()
remote_ip = self._remote_server_conn.sock.getpeername()[0] remote_ip = self._remote_server_conn.sock.getpeername()[0]
timestamp = doublethink.utcnow() timestamp = doublethink.utcnow()
extra_response_headers = {} extra_response_headers = {}
@ -343,36 +348,21 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
except: except:
self.logger.error("uncaught exception in do_WARCPROX_WRITE_RECORD", exc_info=True) self.logger.error("uncaught exception in do_WARCPROX_WRITE_RECORD", exc_info=True)
raise raise
def send_error(self, code, message=None, explain=None, exception=None): def send_error(self, code, message=None, explain=None, exception=None):
super().send_error(code, message=message, explain=explain, exception=exception) super().send_error(code, message=message, explain=explain, exception=exception)
# Build request # If error happens during CONNECT handling and before the inner request, self.url
req_str = '{} {} {}\r\n'.format( # is unset, and self.path is something like 'example.com:443'
self.command, self.path, self.request_version) urlish = self.url or self.path
# Swallow headers that don't make sense to forward on, i.e. most warcprox_meta = self._parse_warcprox_meta()
# hop-by-hop headers. http://tools.ietf.org/html/rfc2616#section-13.5. self._swallow_hop_by_hop_headers()
# self.headers is an email.message.Message, which is case-insensitive request_data = self._build_request()
# and doesn't throw KeyError in __delitem__
for key in (
'Connection', 'Proxy-Connection', 'Keep-Alive',
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
del self.headers[key]
# Add headers to the request
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
req_str += '\r\n'.join(
'{}: {}'.format(k,v) for (k,v) in self.headers.items())
warcprox_meta = None
raw_warcprox_meta = self.headers.get('Warcprox-Meta')
if raw_warcprox_meta:
warcprox_meta = json.loads(raw_warcprox_meta)
req = req_str.encode('latin1') + b'\r\n\r\n'
failed_url = FailedUrl( failed_url = FailedUrl(
url=self.url, url=urlish,
request_data=req, request_data=request_data,
warcprox_meta=warcprox_meta, warcprox_meta=warcprox_meta,
status=code, status=code,
client_ip=self.client_address[0], client_ip=self.client_address[0],
@ -386,7 +376,6 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
self.server.recorded_url_q.put(failed_url) self.server.recorded_url_q.put(failed_url)
def log_message(self, fmt, *args): def log_message(self, fmt, *args):
# logging better handled elsewhere? # logging better handled elsewhere?
pass pass