mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
some logging tweaks, etc
This commit is contained in:
parent
0647c0c76d
commit
f00602b764
@ -117,7 +117,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
def do_COMMAND(self):
|
def do_COMMAND(self):
|
||||||
if not self.is_connect:
|
if not self.is_connect:
|
||||||
if self.command == 'PUTMETA':
|
if self.command == 'PUTMETA':
|
||||||
self._handle_custom_record(type_='metadata')
|
self._prepare_custom_record(method=self.command, type_='metadata')
|
||||||
return
|
return
|
||||||
# if self.command == 'PUTRES':
|
# if self.command == 'PUTRES':
|
||||||
# self._handle_custom_record(type_='resource')
|
# self._handle_custom_record(type_='resource')
|
||||||
@ -137,7 +137,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
self._proxy_request()
|
self._proxy_request()
|
||||||
|
|
||||||
def _handle_custom_record(self, type_):
|
def _handle_custom_record(self, method, type_):
|
||||||
raise Exception('Not supported')
|
raise Exception('Not supported')
|
||||||
|
|
||||||
def _proxy_request(self):
|
def _proxy_request(self):
|
||||||
@ -152,7 +152,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
self.log_date_time_string(), fmt % args))
|
self.log_date_time_string(), fmt % args))
|
||||||
|
|
||||||
def log_message(self, fmt, *args):
|
def log_message(self, fmt, *args):
|
||||||
self.logger.debug("{} {} - - [{}] {}".format(self.__class__.__name__,
|
self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__,
|
||||||
self.address_string(), self.log_date_time_string(), fmt % args))
|
self.address_string(), self.log_date_time_string(), fmt % args))
|
||||||
|
|
||||||
|
|
||||||
|
@ -216,7 +216,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
|
|
||||||
return recorded_url
|
return recorded_url
|
||||||
|
|
||||||
def _handle_custom_record(self, type_):
|
def _handle_custom_record(self, method, type_):
|
||||||
self.url = self.path
|
self.url = self.path
|
||||||
|
|
||||||
if 'Content-Length' in self.headers and 'Content-Type' in self.headers:
|
if 'Content-Length' in self.headers and 'Content-Type' in self.headers:
|
||||||
@ -230,7 +230,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
remote_ip=b'',
|
remote_ip=b'',
|
||||||
warcprox_meta=warcprox_meta,
|
warcprox_meta=warcprox_meta,
|
||||||
content_type=self.headers['Content-Type'].encode('latin1'),
|
content_type=self.headers['Content-Type'].encode('latin1'),
|
||||||
custom_type=type_)
|
custom_type=type_,
|
||||||
|
method=method,
|
||||||
|
status=204, size=len(request_data))
|
||||||
|
|
||||||
self.server.recorded_url_q.put(rec_custom)
|
self.server.recorded_url_q.put(rec_custom)
|
||||||
self.send_response(204, 'OK')
|
self.send_response(204, 'OK')
|
||||||
@ -239,6 +241,13 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
|
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
|
def log_error(self, fmt, *args):
|
||||||
|
# logging better handled elsewhere?
|
||||||
|
pass
|
||||||
|
|
||||||
|
def log_message(self, fmt, *args):
|
||||||
|
# logging better handled elsewhere?
|
||||||
|
pass
|
||||||
|
|
||||||
class RecordedUrl(object):
|
class RecordedUrl(object):
|
||||||
def __init__(self, url, request_data, response_recorder, remote_ip,
|
def __init__(self, url, request_data, response_recorder, remote_ip,
|
||||||
@ -271,7 +280,6 @@ class RecordedUrl(object):
|
|||||||
self.status = status
|
self.status = status
|
||||||
self.size = size
|
self.size = size
|
||||||
|
|
||||||
|
|
||||||
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
|
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
|
||||||
logger = logging.getLogger("warcprox.warcprox.WarcProxy")
|
logger = logging.getLogger("warcprox.warcprox.WarcProxy")
|
||||||
|
|
||||||
|
@ -240,13 +240,16 @@ class WarcWriter:
|
|||||||
recorded_url.response_recorder.tempfile.close()
|
recorded_url.response_recorder.tempfile.close()
|
||||||
|
|
||||||
self._last_activity = time.time()
|
self._last_activity = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8")
|
||||||
|
except:
|
||||||
|
payload_digest = "-"
|
||||||
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
|
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
|
||||||
self.logger.info("{} {} {} size={} {} {} offset={}".format(
|
self.logger.info("{} {} {} size={} {} {} offset={}".format(
|
||||||
recorded_url.status, recorded_url.method,
|
recorded_url.status, recorded_url.method,
|
||||||
recorded_url.url.decode('utf-8'), recorded_url.size,
|
recorded_url.url.decode('utf-8'), recorded_url.size,
|
||||||
recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8"),
|
payload_digest, self._f_finalname, recordset_offset))
|
||||||
self._f_finalname, recordset_offset))
|
|
||||||
|
|
||||||
def write_records(self, recorded_url):
|
def write_records(self, recorded_url):
|
||||||
recordset = self.build_warc_records(recorded_url)
|
recordset = self.build_warc_records(recorded_url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user