logging and exception handling tweaks

This commit is contained in:
Noah Levitt 2015-07-24 01:39:11 +00:00
parent eb7de9d3f9
commit 86eab2119a
3 changed files with 39 additions and 20 deletions

View File

@ -32,7 +32,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.url = self.path
u = urllib_parse.urlparse(self.url)
if u.scheme != 'http':
raise Exception('Unknown scheme %s' % repr(u.scheme))
raise Exception('unable to parse request "{}" as a proxy request'.format(self.requestline))
self.hostname = u.hostname
self.port = u.port or 80
self.path = urllib_parse.urlunparse(
@ -83,6 +83,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self._transition_to_ssl()
except Exception as e:
try:
self.logger.error("problem with connect line {}: {}".format(repr(self.requestline), e))
if type(e) is socket.timeout:
self.send_error(504, str(e))
else:
@ -129,13 +130,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self._connect_to_host()
assert self.url
except Exception as e:
self.logger.error("problem processing request {}: {}".format(repr(self.requestline), e))
self.send_error(500, str(e))
return
else:
# if self.is_connect we already connected in do_CONNECT
self.url = self._construct_tunneled_url()
self._proxy_request()
try:
self._proxy_request()
except:
self.logger.error("exception from {}".format(self._proxy_request), exc_info=True)
raise
def _special_request(self, method, type_):
raise Exception('Not supported')
@ -147,12 +153,4 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
if item.startswith('do_'):
return self.do_COMMAND
def log_error(self, fmt, *args):
self.logger.error("{0} - - [{1}] {2}".format(self.address_string(),
self.log_date_time_string(), fmt % args))
def log_message(self, fmt, *args):
self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__,
self.address_string(), self.log_date_time_string(), fmt % args))

View File

@ -210,8 +210,11 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
recorded_url = RecordedUrl(url=self.url, request_data=req,
response_recorder=h.recorder, remote_ip=remote_ip,
warcprox_meta=warcprox_meta, method=self.command,
status=h.status, size=h.recorder.len)
warcprox_meta=warcprox_meta,
status=h.status, size=h.recorder.len,
client_ip=self.client_address[0],
content_type=h.getheader("Content-Type"),
method=self.command)
self.server.recorded_url_q.put(recorded_url)
return recorded_url
@ -233,8 +236,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
warcprox_meta=warcprox_meta,
content_type=self.headers['Content-Type'].encode('latin1'),
custom_type=type_,
method=method,
status=204, size=len(request_data))
status=204, size=len(request_data),
client_ip=self.client_address[0],
method=method)
self.server.recorded_url_q.put(rec_custom)
self.send_response(204, 'OK')
@ -254,10 +258,11 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
# logging better handled elsewhere?
pass
class RecordedUrl(object):
def __init__(self, url, request_data, response_recorder, remote_ip,
warcprox_meta=None, content_type=None, custom_type=None,
method=None, status=None, size=None):
status=None, size=None, client_ip=None, method=None):
# XXX should test what happens with non-ascii url (when does
# url-encoding happen?)
if type(url) is not bytes:
@ -281,9 +286,10 @@ class RecordedUrl(object):
self.content_type = content_type
self.custom_type = custom_type
self.method = method
self.status = status
self.size = size
self.client_ip = client_ip
self.method = method
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
logger = logging.getLogger("warcprox.warcprox.WarcProxy")

View File

@ -226,6 +226,12 @@ class WarcWriter:
return self._f
def _decode(self, x):
if isinstance(x, bytes):
return x.decode("utf-8")
else:
return x
def _final_tasks(self, recorded_url, recordset, recordset_offset):
if (self.dedup_db is not None
and recordset[0].get_header(warctools.WarcRecord.TYPE) == warctools.WarcRecord.RESPONSE
@ -245,11 +251,20 @@ class WarcWriter:
payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8")
except:
payload_digest = "-"
mimetype = self._decode(recorded_url.content_type)
mimetype = mimetype[:mimetype.find(";")]
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
self.logger.info("{} {} {} size={} {} {} offset={}".format(
recorded_url.status, recorded_url.method,
recorded_url.url.decode('utf-8'), recorded_url.size,
payload_digest, self._f_finalname, recordset_offset))
self.logger.info("{} {} {} {} {} size={} {} {} offset={}".format(
self._decode(recorded_url.client_ip),
self._decode(recorded_url.status),
self._decode(recorded_url.method),
self._decode(recorded_url.url),
mimetype,
recorded_url.size,
self._decode(payload_digest),
self._decode(self._f_finalname),
recordset_offset))
def write_records(self, recorded_url):
recordset = self.build_warc_records(recorded_url)