mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
some logging tweaks, etc
This commit is contained in:
parent
0647c0c76d
commit
f00602b764
@ -117,7 +117,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
def do_COMMAND(self):
|
||||
if not self.is_connect:
|
||||
if self.command == 'PUTMETA':
|
||||
self._handle_custom_record(type_='metadata')
|
||||
self._prepare_custom_record(method=self.command, type_='metadata')
|
||||
return
|
||||
# if self.command == 'PUTRES':
|
||||
# self._handle_custom_record(type_='resource')
|
||||
@ -137,7 +137,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
|
||||
self._proxy_request()
|
||||
|
||||
def _handle_custom_record(self, type_):
|
||||
def _handle_custom_record(self, method, type_):
|
||||
raise Exception('Not supported')
|
||||
|
||||
def _proxy_request(self):
|
||||
@ -152,7 +152,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self.log_date_time_string(), fmt % args))
|
||||
|
||||
def log_message(self, fmt, *args):
|
||||
self.logger.debug("{} {} - - [{}] {}".format(self.__class__.__name__,
|
||||
self.logger.info("{} {} - - [{}] {}".format(self.__class__.__name__,
|
||||
self.address_string(), self.log_date_time_string(), fmt % args))
|
||||
|
||||
|
||||
|
@ -216,7 +216,7 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
|
||||
return recorded_url
|
||||
|
||||
def _handle_custom_record(self, type_):
|
||||
def _handle_custom_record(self, method, type_):
|
||||
self.url = self.path
|
||||
|
||||
if 'Content-Length' in self.headers and 'Content-Type' in self.headers:
|
||||
@ -230,7 +230,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
remote_ip=b'',
|
||||
warcprox_meta=warcprox_meta,
|
||||
content_type=self.headers['Content-Type'].encode('latin1'),
|
||||
custom_type=type_)
|
||||
custom_type=type_,
|
||||
method=method,
|
||||
status=204, size=len(request_data))
|
||||
|
||||
self.server.recorded_url_q.put(rec_custom)
|
||||
self.send_response(204, 'OK')
|
||||
@ -239,6 +241,13 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
|
||||
self.end_headers()
|
||||
|
||||
def log_error(self, fmt, *args):
|
||||
# logging better handled elsewhere?
|
||||
pass
|
||||
|
||||
def log_message(self, fmt, *args):
|
||||
# logging better handled elsewhere?
|
||||
pass
|
||||
|
||||
class RecordedUrl(object):
|
||||
def __init__(self, url, request_data, response_recorder, remote_ip,
|
||||
@ -271,7 +280,6 @@ class RecordedUrl(object):
|
||||
self.status = status
|
||||
self.size = size
|
||||
|
||||
|
||||
class WarcProxy(socketserver.ThreadingMixIn, http_server.HTTPServer):
|
||||
logger = logging.getLogger("warcprox.warcprox.WarcProxy")
|
||||
|
||||
|
@ -240,13 +240,16 @@ class WarcWriter:
|
||||
recorded_url.response_recorder.tempfile.close()
|
||||
|
||||
self._last_activity = time.time()
|
||||
|
||||
|
||||
try:
|
||||
payload_digest = recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8")
|
||||
except:
|
||||
payload_digest = "-"
|
||||
# 2015-07-17T22:32:23.672Z 1 58 dns:www.dhss.delaware.gov P http://www.dhss.delaware.gov/dhss/ text/dns #045 20150717223214881+316 sha1:63UTPB7GTWIHAGIK3WWL76E57BBTJGAK http://www.dhss.delaware.gov/dhss/ - {"warcFileOffset":2964,"warcFilename":"ARCHIVEIT-1303-WEEKLY-JOB165158-20150717223222113-00000.warc.gz"}
|
||||
self.logger.info("{} {} {} size={} {} {} offset={}".format(
|
||||
recorded_url.status, recorded_url.method,
|
||||
recorded_url.status, recorded_url.method,
|
||||
recorded_url.url.decode('utf-8'), recorded_url.size,
|
||||
recordset[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8"),
|
||||
self._f_finalname, recordset_offset))
|
||||
payload_digest, self._f_finalname, recordset_offset))
|
||||
|
||||
def write_records(self, recorded_url):
|
||||
recordset = self.build_warc_records(recorded_url)
|
||||
|
Loading…
x
Reference in New Issue
Block a user