some logging tweaks, etc

This commit is contained in:
Noah Levitt 2015-07-20 13:40:20 -07:00
parent 0647c0c76d
commit f00602b764
3 changed files with 21 additions and 10 deletions

View File

@ -117,7 +117,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
def do_COMMAND(self):
if not self.is_connect:
if self.command == 'PUTMETA':
self._handle_custom_record(type_='metadata')
self._prepare_custom_record(method=self.command, type_='metadata')
return
# if self.command == 'PUTRES':
# self._handle_custom_record(type_='resource')
@ -137,7 +137,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self._proxy_request()
def _handle_custom_record(self, type_):
def _handle_custom_record(self, method, type_):
raise Exception('Not supported')
def _proxy_request(self):
@ -152,7 +152,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.log_date_time_string(), fmt % args))
def log_message(self, fmt, *args):
self.logger.debug("{} {} - - [{}] {}".format(self.__class__.__name__,
self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__,
self.address_string(), self.log_date_time_string(), fmt % args))

View File

@ -216,7 +216,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
return recorded_url
def _handle_custom_record(self, type_):
def _handle_custom_record(self, method, type_):
self.url = self.path
if 'Content-Length' in self.headers and 'Content-Type' in self.headers:
@ -230,7 +230,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
remote_ip=b'',
warcprox_meta=warcprox_meta,
content_type=self.headers['Content-Type'].encode('latin1'),
custom_type=type_)
custom_type=type_,
method=method,
status=204, size=len(request_data))
self.server.recorded_url_q.put(rec_custom)
self.send_response(204, 'OK')
@ -239,6 +241,13 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
self.end_headers()
def log_error(self, fmt, *args):
# logging better handled elsewhere?
pass
def log_message(self, fmt, *args):
# logging better handled elsewhere?
pass
class RecordedUrl(object):
def __init__(self, url, request_data, response_recorder, remote_ip,
@ -271,7 +280,6 @@ class RecordedUrl(object):
self.status = status
self.size = size
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
logger = logging.getLogger("warcprox.warcprox.WarcProxy")

View File

@ -240,13 +240,16 @@ class WarcWriter:
recorded_url.response_recorder.tempfile.close()
self._last_activity = time.time()
try:
payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8")
except:
payload_digest = "-"
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
self.logger.info("{} {} {} size={} {} {} offset={}".format(
recorded_url.status, recorded_url.method,
recorded_url.status, recorded_url.method,
recorded_url.url.decode('utf-8'), recorded_url.size,
recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8"),
self._f_finalname, recordset_offset))
payload_digest, self._f_finalname, recordset_offset))
def write_records(self, recorded_url):
recordset = self.build_warc_records(recorded_url)