mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
logging and exception handling tweaks
This commit is contained in:
parent
eb7de9d3f9
commit
86eab2119a
@ -32,7 +32,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self.url = self.path
|
||||
u = urllib_parse.urlparse(self.url)
|
||||
if u.scheme != 'http':
|
||||
raise Exception('Unknown scheme %s' % repr(u.scheme))
|
||||
raise Exception('unable to parse request "{}" as a proxy request'.format(self.requestline))
|
||||
self.hostname = u.hostname
|
||||
self.port = u.port or 80
|
||||
self.path = urllib_parse.urlunparse(
|
||||
@ -83,6 +83,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self._transition_to_ssl()
|
||||
except Exception as e:
|
||||
try:
|
||||
self.logger.error("problem with connect line {}: {}".format(repr(self.requestline), e))
|
||||
if type(e) is socket.timeout:
|
||||
self.send_error(504, str(e))
|
||||
else:
|
||||
@ -129,13 +130,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self._connect_to_host()
|
||||
assert self.url
|
||||
except Exception as e:
|
||||
self.logger.error("problem processing request {}: {}".format(repr(self.requestline), e))
|
||||
self.send_error(500, str(e))
|
||||
return
|
||||
else:
|
||||
# if self.is_connect we already connected in do_CONNECT
|
||||
self.url = self._construct_tunneled_url()
|
||||
|
||||
self._proxy_request()
|
||||
try:
|
||||
self._proxy_request()
|
||||
except:
|
||||
self.logger.error("exception from {}".format(self._proxy_request), exc_info=True)
|
||||
raise
|
||||
|
||||
def _special_request(self, method, type_):
|
||||
raise Exception('Not supported')
|
||||
@ -147,12 +153,4 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
if item.startswith('do_'):
|
||||
return self.do_COMMAND
|
||||
|
||||
def log_error(self, fmt, *args):
|
||||
self.logger.error("{0} - - [{1}] {2}".format(self.address_string(),
|
||||
self.log_date_time_string(), fmt % args))
|
||||
|
||||
def log_message(self, fmt, *args):
|
||||
self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__,
|
||||
self.address_string(), self.log_date_time_string(), fmt % args))
|
||||
|
||||
|
||||
|
@ -210,8 +210,11 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
|
||||
recorded_url = RecordedUrl(url=self.url, request_data=req,
|
||||
response_recorder=h.recorder, remote_ip=remote_ip,
|
||||
warcprox_meta=warcprox_meta, method=self.command,
|
||||
status=h.status, size=h.recorder.len)
|
||||
warcprox_meta=warcprox_meta,
|
||||
status=h.status, size=h.recorder.len,
|
||||
client_ip=self.client_address[0],
|
||||
content_type=h.getheader("Content-Type"),
|
||||
method=self.command)
|
||||
self.server.recorded_url_q.put(recorded_url)
|
||||
|
||||
return recorded_url
|
||||
@ -233,8 +236,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
warcprox_meta=warcprox_meta,
|
||||
content_type=self.headers['Content-Type'].encode('latin1'),
|
||||
custom_type=type_,
|
||||
method=method,
|
||||
status=204, size=len(request_data))
|
||||
status=204, size=len(request_data),
|
||||
client_ip=self.client_address[0],
|
||||
method=method)
|
||||
|
||||
self.server.recorded_url_q.put(rec_custom)
|
||||
self.send_response(204, 'OK')
|
||||
@ -254,10 +258,11 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
# logging better handled elsewhere?
|
||||
pass
|
||||
|
||||
|
||||
class RecordedUrl(object):
|
||||
def __init__(self, url, request_data, response_recorder, remote_ip,
|
||||
warcprox_meta=None, content_type=None, custom_type=None,
|
||||
method=None, status=None, size=None):
|
||||
status=None, size=None, client_ip=None, method=None):
|
||||
# XXX should test what happens with non-ascii url (when does
|
||||
# url-encoding happen?)
|
||||
if type(url) is not bytes:
|
||||
@ -281,9 +286,10 @@ class RecordedUrl(object):
|
||||
self.content_type = content_type
|
||||
self.custom_type = custom_type
|
||||
|
||||
self.method = method
|
||||
self.status = status
|
||||
self.size = size
|
||||
self.client_ip = client_ip
|
||||
self.method = method
|
||||
|
||||
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
|
||||
logger = logging.getLogger("warcprox.warcprox.WarcProxy")
|
||||
|
@ -226,6 +226,12 @@ class WarcWriter:
|
||||
|
||||
return self._f
|
||||
|
||||
def _decode(self, x):
|
||||
if isinstance(x, bytes):
|
||||
return x.decode("utf-8")
|
||||
else:
|
||||
return x
|
||||
|
||||
def _final_tasks(self, recorded_url, recordset, recordset_offset):
|
||||
if (self.dedup_db is not None
|
||||
and recordset[0].get_header(warctools.WarcRecord.TYPE) == warctools.WarcRecord.RESPONSE
|
||||
@ -245,11 +251,20 @@ class WarcWriter:
|
||||
payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8")
|
||||
except:
|
||||
payload_digest = "-"
|
||||
mimetype = self._decode(recorded_url.content_type)
|
||||
mimetype = mimetype[:mimetype.find(";")]
|
||||
|
||||
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
|
||||
self.logger.info("{} {} {} size={} {} {} offset={}".format(
|
||||
recorded_url.status, recorded_url.method,
|
||||
recorded_url.url.decode('utf-8'), recorded_url.size,
|
||||
payload_digest, self._f_finalname, recordset_offset))
|
||||
self.logger.info("{} {} {} {} {} size={} {} {} offset={}".format(
|
||||
self._decode(recorded_url.client_ip),
|
||||
self._decode(recorded_url.status),
|
||||
self._decode(recorded_url.method),
|
||||
self._decode(recorded_url.url),
|
||||
mimetype,
|
||||
recorded_url.size,
|
||||
self._decode(payload_digest),
|
||||
self._decode(self._f_finalname),
|
||||
recordset_offset))
|
||||
|
||||
def write_records(self, recorded_url):
|
||||
recordset = self.build_warc_records(recorded_url)
|
||||
|
Loading…
x
Reference in New Issue
Block a user