From f00602b764fe86ebdceda28cc4df2fb2a12e3043 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Mon, 20 Jul 2015 13:40:20 -0700 Subject: [PATCH] some logging tweaks, etc --- warcprox/mitmproxy.py | 6 +++--- warcprox/warcprox.py | 14 +++++++++++--- warcprox/warcwriter.py | 11 +++++++---- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 24758f0..d6fe96d 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -117,7 +117,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): def do_COMMAND(self): if not self.is_connect: if self.command == 'PUTMETA': - self._handle_custom_record(type_='metadata') + self._prepare_custom_record(method=self.command, type_='metadata') return # if self.command == 'PUTRES': # self._handle_custom_record(type_='resource') @@ -137,7 +137,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self._proxy_request() - def _handle_custom_record(self, type_): + def _handle_custom_record(self, method, type_): raise Exception('Not supported') def _proxy_request(self): @@ -152,7 +152,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self.log_date_time_string(), fmt % args)) def log_message(self, fmt, *args): - self.logger.debug("{} {} - - [{}] {}".format(self.__class__.__name__, + self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__, self.address_string(), self.log_date_time_string(), fmt % args)) diff --git a/warcprox/warcprox.py b/warcprox/warcprox.py index 68184b2..7fcebbd 100644 --- a/warcprox/warcprox.py +++ b/warcprox/warcprox.py @@ -216,7 +216,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): return recorded_url - def _handle_custom_record(self, type_): + def _handle_custom_record(self, method, type_): self.url = self.path if 'Content-Length' in self.headers and 'Content-Type' in self.headers: @@ -230,7 +230,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): remote_ip=b'', warcprox_meta=warcprox_meta, content_type=self.headers['Content-Type'].encode('latin1'), - custom_type=type_) + custom_type=type_, + method=method, + status=204, size=len(request_data)) self.server.recorded_url_q.put(rec_custom) self.send_response(204, 'OK') @@ -239,6 +241,13 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler): self.end_headers() + def log_error(self, fmt, *args): + # logging better handled elsewhere? + pass + + def log_message(self, fmt, *args): + # logging better handled elsewhere? + pass class RecordedUrl(object): def __init__(self, url, request_data, response_recorder, remote_ip, @@ -271,7 +280,6 @@ class RecordedUrl(object): self.status = status self.size = size - class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer): logger = logging.getLogger("warcprox.warcprox.WarcProxy") diff --git a/warcprox/warcwriter.py b/warcprox/warcwriter.py index 4736a86..7146904 100644 --- a/warcprox/warcwriter.py +++ b/warcprox/warcwriter.py @@ -240,13 +240,16 @@ class WarcWriter: recorded_url.response_recorder.tempfile.close() self._last_activity = time.time() - + + try: + payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8") + except: + payload_digest = "-" # 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"} self.logger.info("{} {} {} size={} {} {} offset={}".format( - recorded_url.status, recorded_url.method, + recorded_url.status, recorded_url.method, recorded_url.url.decode('utf-8'), recorded_url.size, - recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8"), - self._f_finalname, recordset_offset)) + payload_digest, self._f_finalname, recordset_offset)) def write_records(self, recorded_url): recordset = self.build_warc_records(recorded_url)