mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
appease some warnings
This commit is contained in:
parent
f2eebae641
commit
a25971e06b
@ -68,6 +68,10 @@ import certauth.certauth
|
|||||||
import warcprox
|
import warcprox
|
||||||
import warcprox.main
|
import warcprox.main
|
||||||
|
|
||||||
|
# https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
|
||||||
|
import urllib3
|
||||||
|
urllib3.disable_warnings()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.client as http_client
|
import http.client as http_client
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -144,7 +148,7 @@ def dump_state(signum=None, frame=None):
|
|||||||
stack = traceback.format_stack(sys._current_frames()[th.ident])
|
stack = traceback.format_stack(sys._current_frames()[th.ident])
|
||||||
state_strs.append("".join(stack))
|
state_strs.append("".join(stack))
|
||||||
|
|
||||||
logging.warn("dumping state (caught signal {})\n{}".format(signum, "\n".join(state_strs)))
|
logging.warning("dumping state (caught signal {})\n{}".format(signum, "\n".join(state_strs)))
|
||||||
|
|
||||||
signal.signal(signal.SIGQUIT, dump_state)
|
signal.signal(signal.SIGQUIT, dump_state)
|
||||||
|
|
||||||
@ -446,7 +450,7 @@ def warcprox_(request, http_daemon, https_daemon):
|
|||||||
logging.info('dropping rethinkdb database %r', parsed.database)
|
logging.info('dropping rethinkdb database %r', parsed.database)
|
||||||
rr.db_drop(parsed.database).run()
|
rr.db_drop(parsed.database).run()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'problem deleting rethinkdb database %r: %s',
|
'problem deleting rethinkdb database %r: %s',
|
||||||
parsed.database, e)
|
parsed.database, e)
|
||||||
logging.info('deleting working directory %r', work_dir)
|
logging.info('deleting working directory %r', work_dir)
|
||||||
@ -1762,7 +1766,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
|
|
||||||
crawl_log = open(default_crawl_log_path, 'rb').read()
|
crawl_log = open(default_crawl_log_path, 'rb').read()
|
||||||
# tests will fail in year 3000 :)
|
# tests will fail in year 3000 :)
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log)
|
assert re.match(br'\A2[^\n]+\n\Z', crawl_log)
|
||||||
assert crawl_log[24:31] == b' 200 '
|
assert crawl_log[24:31] == b' 200 '
|
||||||
assert crawl_log[31:42] == b' 54 '
|
assert crawl_log[31:42] == b' 54 '
|
||||||
fields = crawl_log.split()
|
fields = crawl_log.split()
|
||||||
@ -1782,7 +1786,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert extra_info['contentSize'] == 145
|
assert extra_info['contentSize'] == 145
|
||||||
|
|
||||||
crawl_log_1 = open(file, 'rb').read()
|
crawl_log_1 = open(file, 'rb').read()
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_1)
|
assert re.match(br'\A2[^\n]+\n\Z', crawl_log_1)
|
||||||
assert crawl_log_1[24:31] == b' 200 '
|
assert crawl_log_1[24:31] == b' 200 '
|
||||||
assert crawl_log_1[31:42] == b' 54 '
|
assert crawl_log_1[31:42] == b' 54 '
|
||||||
fields = crawl_log_1.split()
|
fields = crawl_log_1.split()
|
||||||
@ -1820,7 +1824,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
|
|
||||||
crawl_log_2 = open(file, 'rb').read()
|
crawl_log_2 = open(file, 'rb').read()
|
||||||
|
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_2)
|
assert re.match(br'\A2[^\n]+\n\Z', crawl_log_2)
|
||||||
assert crawl_log_2[24:31] == b' 200 '
|
assert crawl_log_2[24:31] == b' 200 '
|
||||||
assert crawl_log_2[31:42] == b' 54 '
|
assert crawl_log_2[31:42] == b' 54 '
|
||||||
fields = crawl_log_2.split()
|
fields = crawl_log_2.split()
|
||||||
@ -1853,7 +1857,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
|
|
||||||
assert os.path.exists(file)
|
assert os.path.exists(file)
|
||||||
crawl_log_3 = open(file, 'rb').read()
|
crawl_log_3 = open(file, 'rb').read()
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_3)
|
assert re.match(br'\A2[^\n]+\n\Z', crawl_log_3)
|
||||||
assert crawl_log_3[24:31] == b' 200 '
|
assert crawl_log_3[24:31] == b' 200 '
|
||||||
assert crawl_log_3[31:42] == b' 0 '
|
assert crawl_log_3[31:42] == b' 0 '
|
||||||
fields = crawl_log_3.split()
|
fields = crawl_log_3.split()
|
||||||
@ -1893,7 +1897,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert os.path.exists(file)
|
assert os.path.exists(file)
|
||||||
crawl_log_4 = open(file, 'rb').read()
|
crawl_log_4 = open(file, 'rb').read()
|
||||||
|
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_4)
|
assert re.match(br'\A2[^\n]+\n\Z', crawl_log_4)
|
||||||
assert crawl_log_4[24:31] == b' 204 '
|
assert crawl_log_4[24:31] == b' 204 '
|
||||||
assert crawl_log_4[31:42] == b' 38 '
|
assert crawl_log_4[31:42] == b' 38 '
|
||||||
fields = crawl_log_4.split()
|
fields = crawl_log_4.split()
|
||||||
|
@ -71,7 +71,7 @@ class RethinkCaptures:
|
|||||||
"unexpected result saving batch of %s: %s "
|
"unexpected result saving batch of %s: %s "
|
||||||
"entries" % (len(self._batch), result))
|
"entries" % (len(self._batch), result))
|
||||||
if result["replaced"] > 0 or result["unchanged"] > 0:
|
if result["replaced"] > 0 or result["unchanged"] > 0:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"inserted=%s replaced=%s unchanged=%s in big "
|
"inserted=%s replaced=%s unchanged=%s in big "
|
||||||
"captures table (normally replaced=0 and "
|
"captures table (normally replaced=0 and "
|
||||||
"unchanged=0)", result["inserted"],
|
"unchanged=0)", result["inserted"],
|
||||||
@ -148,7 +148,7 @@ class RethinkCaptures:
|
|||||||
recorded_url.payload_digest.digest()
|
recorded_url.payload_digest.digest()
|
||||||
).decode("utf-8")
|
).decode("utf-8")
|
||||||
else:
|
else:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"digest type is %r but big captures table is indexed "
|
"digest type is %r but big captures table is indexed "
|
||||||
"by sha1",
|
"by sha1",
|
||||||
recorded_url.payload_digest.name)
|
recorded_url.payload_digest.name)
|
||||||
|
@ -374,7 +374,7 @@ class BatchTroughStorer(warcprox.BaseBatchPostfetchProcessor):
|
|||||||
except futures.TimeoutError as e:
|
except futures.TimeoutError as e:
|
||||||
# the remaining threads actually keep running in this case,
|
# the remaining threads actually keep running in this case,
|
||||||
# there's no way to stop them, but that should be harmless
|
# there's no way to stop them, but that should be harmless
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'timed out saving dedup info to trough', exc_info=True)
|
'timed out saving dedup info to trough', exc_info=True)
|
||||||
|
|
||||||
class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
|
class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
|
||||||
@ -458,7 +458,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
|
|||||||
recorded_url.dedup_info = entry
|
recorded_url.dedup_info = entry
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# batch_lookup raised exception or something
|
# batch_lookup raised exception or something
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'problem looking up dedup info for %s urls '
|
'problem looking up dedup info for %s urls '
|
||||||
'in bucket %s', len(buckets[bucket]), bucket,
|
'in bucket %s', len(buckets[bucket]), bucket,
|
||||||
exc_info=True)
|
exc_info=True)
|
||||||
@ -474,7 +474,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
|
|||||||
except futures.TimeoutError as e:
|
except futures.TimeoutError as e:
|
||||||
# the remaining threads actually keep running in this case,
|
# the remaining threads actually keep running in this case,
|
||||||
# there's no way to stop them, but that should be harmless
|
# there's no way to stop them, but that should be harmless
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'timed out loading dedup info from trough', exc_info=True)
|
'timed out loading dedup info from trough', exc_info=True)
|
||||||
|
|
||||||
class TroughDedupDb(DedupDb, DedupableMixin):
|
class TroughDedupDb(DedupDb, DedupableMixin):
|
||||||
|
@ -264,7 +264,7 @@ def dump_state(signum=None, frame=None):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
state_strs.append('<n/a:%r>' % e)
|
state_strs.append('<n/a:%r>' % e)
|
||||||
|
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'dumping state (caught signal %s)\n%s',
|
'dumping state (caught signal %s)\n%s',
|
||||||
signum, '\n'.join(state_strs))
|
signum, '\n'.join(state_strs))
|
||||||
|
|
||||||
@ -402,7 +402,7 @@ def ensure_rethinkdb_tables(argv=None):
|
|||||||
did_something = True
|
did_something = True
|
||||||
if args.rethinkdb_trough_db_url:
|
if args.rethinkdb_trough_db_url:
|
||||||
dedup_db = warcprox.dedup.TroughDedupDb(options)
|
dedup_db = warcprox.dedup.TroughDedupDb(options)
|
||||||
logging.warn(
|
logging.warning(
|
||||||
'trough is responsible for creating most of the rethinkdb '
|
'trough is responsible for creating most of the rethinkdb '
|
||||||
'tables that it uses')
|
'tables that it uses')
|
||||||
did_something = True
|
did_something = True
|
||||||
|
@ -100,7 +100,7 @@ class ProxyingRecorder(object):
|
|||||||
self.proxy_client.sendall(hunk)
|
self.proxy_client.sendall(hunk)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
self._proxy_client_conn_open = False
|
self._proxy_client_conn_open = False
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'%s sending data to proxy client for url %s',
|
'%s sending data to proxy client for url %s',
|
||||||
e, self.url)
|
e, self.url)
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
@ -283,7 +283,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
self._remote_server_conn.sock = ssl.wrap_socket(
|
self._remote_server_conn.sock = ssl.wrap_socket(
|
||||||
self._remote_server_conn.sock)
|
self._remote_server_conn.sock)
|
||||||
except ssl.SSLError:
|
except ssl.SSLError:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"failed to establish ssl connection to %s; "
|
"failed to establish ssl connection to %s; "
|
||||||
"python ssl library does not support SNI, "
|
"python ssl library does not support SNI, "
|
||||||
"consider upgrading to python 2.7.9+ or 3.4+",
|
"consider upgrading to python 2.7.9+ or 3.4+",
|
||||||
@ -332,7 +332,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
else:
|
else:
|
||||||
self.send_error(500, str(e))
|
self.send_error(500, str(e))
|
||||||
except Exception as f:
|
except Exception as f:
|
||||||
self.logger.warn("failed to send error response ({}) to proxy client: {}".format(e, f))
|
self.logger.warning("failed to send error response ({}) to proxy client: {}".format(e, f))
|
||||||
return
|
return
|
||||||
|
|
||||||
# Reload!
|
# Reload!
|
||||||
@ -386,7 +386,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
return self._proxy_request()
|
return self._proxy_request()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.server.shutting_down:
|
if self.server.shutting_down:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'sending 503 warcprox shutting down %r: %r',
|
'sending 503 warcprox shutting down %r: %r',
|
||||||
self.requestline, e)
|
self.requestline, e)
|
||||||
self.send_error(503, 'warcprox shutting down')
|
self.send_error(503, 'warcprox shutting down')
|
||||||
@ -521,7 +521,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
return self.do_COMMAND
|
return self.do_COMMAND
|
||||||
|
|
||||||
def log_error(self, fmt, *args):
|
def log_error(self, fmt, *args):
|
||||||
self.logger.warn(fmt, *args)
|
self.logger.warning(fmt, *args)
|
||||||
|
|
||||||
class PooledMixIn(socketserver.ThreadingMixIn):
|
class PooledMixIn(socketserver.ThreadingMixIn):
|
||||||
logger = logging.getLogger("warcprox.mitmproxy.PooledMixIn")
|
logger = logging.getLogger("warcprox.mitmproxy.PooledMixIn")
|
||||||
|
@ -81,7 +81,7 @@ def unravel_buckets(url, warcprox_meta):
|
|||||||
for bucket in warcprox_meta["stats"]["buckets"]:
|
for bucket in warcprox_meta["stats"]["buckets"]:
|
||||||
if isinstance(bucket, dict):
|
if isinstance(bucket, dict):
|
||||||
if not 'bucket' in bucket:
|
if not 'bucket' in bucket:
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'ignoring invalid stats bucket in '
|
'ignoring invalid stats bucket in '
|
||||||
'warcprox-meta header %s', bucket)
|
'warcprox-meta header %s', bucket)
|
||||||
continue
|
continue
|
||||||
|
@ -190,7 +190,7 @@ class TroughClient(object):
|
|||||||
return
|
return
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
self._write_url_cache.pop(segment_id, None)
|
self._write_url_cache.pop(segment_id, None)
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
'unexpected response %r %r %r from %r to sql=%r',
|
'unexpected response %r %r %r from %r to sql=%r',
|
||||||
response.status_code, response.reason, response.text,
|
response.status_code, response.reason, response.text,
|
||||||
write_url, sql)
|
write_url, sql)
|
||||||
|
@ -530,6 +530,6 @@ class WarcProxy(SingleThreadedWarcProxy, warcprox.mitmproxy.PooledMitmProxy):
|
|||||||
self.remote_connection_pool.clear()
|
self.remote_connection_pool.clear()
|
||||||
|
|
||||||
def handle_error(self, request, client_address):
|
def handle_error(self, request, client_address):
|
||||||
self.logger.warn(
|
self.logger.warning(
|
||||||
"exception processing request %s from %s", request,
|
"exception processing request %s from %s", request,
|
||||||
client_address, exc_info=True)
|
client_address, exc_info=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user