use %r instead of calling repr()

This commit is contained in:
Noah Levitt 2017-06-07 16:05:47 -07:00
parent 2f93cdcad9
commit 1500341875
9 changed files with 56 additions and 57 deletions

View File

@ -50,7 +50,7 @@ except:
setuptools.setup( setuptools.setup(
name='warcprox', name='warcprox',
version='2.1b1.dev87', version='2.1b1.dev88',
description='WARC writing MITM HTTP/S proxy', description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox', url='https://github.com/internetarchive/warcprox',
author='Noah Levitt', author='Noah Levitt',

View File

@ -77,7 +77,7 @@ def _send(self, data):
logging.root.handlers[0].stream.write(data) logging.root.handlers[0].stream.write(data)
logging.root.handlers[0].stream.write('\n') logging.root.handlers[0].stream.write('\n')
else: else:
logging.info('sending data from %s', repr(data)) logging.info('sending data from %r', data)
orig_send(self, data) orig_send(self, data)
### uncomment this to block see raw requests going over the wire ### uncomment this to block see raw requests going over the wire
# http_client.HTTPConnection.send = _send # http_client.HTTPConnection.send = _send

View File

@ -102,14 +102,13 @@ class RethinkCaptures:
def _ensure_db_table(self): def _ensure_db_table(self):
dbs = self.rr.db_list().run() dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs: if not self.rr.dbname in dbs:
self.logger.info( self.logger.info("creating rethinkdb database %r", self.rr.dbname)
"creating rethinkdb database %s", repr(self.rr.dbname))
self.rr.db_create(self.rr.dbname).run() self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run() tables = self.rr.table_list().run()
if not self.table in tables: if not self.table in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table %s in database %s", "creating rethinkdb table %r in database %r",
repr(self.table), repr(self.rr.dbname)) self.table, self.rr.dbname)
self.rr.table_create(self.table, shards=self.shards, replicas=self.replicas).run() self.rr.table_create(self.table, shards=self.shards, replicas=self.replicas).run()
self.rr.table(self.table).index_create( self.rr.table(self.table).index_create(
"abbr_canon_surt_timestamp", "abbr_canon_surt_timestamp",
@ -120,7 +119,7 @@ class RethinkCaptures:
def find_response_by_digest(self, algo, raw_digest, bucket="__unspecified__"): def find_response_by_digest(self, algo, raw_digest, bucket="__unspecified__"):
if algo != "sha1": if algo != "sha1":
raise Exception( raise Exception(
"digest type is %s but big captures table is indexed by " "digest type is %r but big captures table is indexed by "
"sha1" % algo) "sha1" % algo)
sha1base32 = base64.b32encode(raw_digest).decode("utf-8") sha1base32 = base64.b32encode(raw_digest).decode("utf-8")
results_iter = self.rr.table(self.table).get_all( results_iter = self.rr.table(self.table).get_all(
@ -130,11 +129,14 @@ class RethinkCaptures:
results = list(results_iter) results = list(results_iter)
if len(results) > 0: if len(results) > 0:
if len(results) > 1: if len(results) > 1:
self.logger.debug("expected 0 or 1 but found %s results for sha1base32=%s bucket=%s (will use first result)", len(results), sha1base32, bucket) self.logger.debug(
"expected 0 or 1 but found %r results for "
"sha1base32=%r bucket=%r (will use first result)",
len(results), sha1base32, bucket)
result = results[0] result = results[0]
else: else:
result = None result = None
self.logger.debug("returning %s for sha1base32=%s bucket=%s", self.logger.debug("returning %r for sha1base32=%r bucket=%r",
result, sha1base32, bucket) result, sha1base32, bucket)
return result return result
@ -146,7 +148,7 @@ class RethinkCaptures:
).decode("utf-8") ).decode("utf-8")
else: else:
self.logger.warn( self.logger.warn(
"digest type is %s but big captures table is indexed " "digest type is %r but big captures table is indexed "
"by sha1", "by sha1",
recorded_url.response_recorder.payload_digest.name) recorded_url.response_recorder.payload_digest.name)
else: else:

View File

@ -135,15 +135,14 @@ class RethinkDedupDb:
def _ensure_db_table(self): def _ensure_db_table(self):
dbs = self.rr.db_list().run() dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs: if not self.rr.dbname in dbs:
self.logger.info( self.logger.info("creating rethinkdb database %r", self.rr.dbname)
"creating rethinkdb database %s", repr(self.rr.dbname))
self.rr.db_create(self.rr.dbname).run() self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run() tables = self.rr.table_list().run()
if not self.table in tables: if not self.table in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table %s in database %s shards=%s " "creating rethinkdb table %r in database %r shards=%r "
"replicas=%s", repr(self.table), repr(self.rr.dbname), "replicas=%r", self.table, self.rr.dbname, self.shards,
self.shards, self.replicas) self.replicas)
self.rr.table_create( self.rr.table_create(
self.table, primary_key="key", shards=self.shards, self.table, primary_key="key", shards=self.shards,
replicas=self.replicas).run() replicas=self.replicas).run()

View File

@ -97,7 +97,7 @@ class CaptureFeed:
d[k] = v d[k] = v
msg = json.dumps(d, separators=(',', ':')).encode('utf-8') msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg) self.logger.debug('feeding kafka topic=%r msg=%r', topic, msg)
p = self._producer() p = self._producer()
if p: if p:
p.send(topic, msg) p.send(topic, msg)

View File

@ -209,8 +209,8 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
u = urllib_parse.urlparse(self.url) u = urllib_parse.urlparse(self.url)
if u.scheme != 'http': if u.scheme != 'http':
raise Exception( raise Exception(
'unable to parse request %s as a proxy request' % ( 'unable to parse request %r as a proxy request' % (
repr(self.requestline))) self.requestline))
host = u.hostname host = u.hostname
self.port = u.port or 80 self.port = u.port or 80
self.path = urllib_parse.urlunparse( self.path = urllib_parse.urlunparse(
@ -294,7 +294,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
except Exception as e: except Exception as e:
try: try:
self.logger.error( self.logger.error(
"problem handling %s: %s", repr(self.requestline), e) "problem handling %r: %r", self.requestline, e)
if type(e) is socket.timeout: if type(e) is socket.timeout:
self.send_error(504, str(e)) self.send_error(504, str(e))
else: else:
@ -328,7 +328,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
def do_COMMAND(self): def do_COMMAND(self):
self.logger.trace( self.logger.trace(
'request from %s:%s: %s', self.client_address[0], 'request from %s:%s: %r', self.client_address[0],
self.client_address[1], self.requestline) self.client_address[1], self.requestline)
try: try:
if self.is_connect: if self.is_connect:
@ -341,12 +341,12 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self._connect_to_remote_server() self._connect_to_remote_server()
except warcprox.RequestBlockedByRule as e: except warcprox.RequestBlockedByRule as e:
# limit enforcers have already sent the appropriate response # limit enforcers have already sent the appropriate response
self.logger.info("%s: %s", repr(self.requestline), e) self.logger.info("%r: %r", self.requestline, e)
return return
except Exception as e: except Exception as e:
self.logger.error( self.logger.error(
"problem processing request %s: %s", "problem processing request %r: %r",
repr(self.requestline), e, exc_info=True) self.requestline, e, exc_info=True)
self.send_error(500, str(e)) self.send_error(500, str(e))
return return
@ -393,7 +393,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
req += self.rfile.read(int(self.headers['Content-Length'])) req += self.rfile.read(int(self.headers['Content-Length']))
try: try:
self.logger.debug('sending to remote server req=%s', repr(req)) self.logger.debug('sending to remote server req=%r', req)
# Send it down the pipe! # Send it down the pipe!
self._remote_server_sock.sendall(req) self._remote_server_sock.sendall(req)
@ -411,7 +411,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len) self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
except Exception as e: except Exception as e:
self.logger.error( self.logger.error(
"%s proxying %s %s", repr(e), self.command, self.url, "%r proxying %s %s", e, self.command, self.url,
exc_info=True) exc_info=True)
finally: finally:
# Let's close off the remote end # Let's close off the remote end

View File

@ -82,7 +82,7 @@ class PlaybackProxyHandler(MitmProxyHandler):
self.connection.sendall(payload) self.connection.sendall(payload)
sz = len(headers) + len(payload) sz = len(headers) + len(payload)
self.log_message('"%s" %s %s %s', self.log_message('%r %s %s %s',
self.requestline, str(status), str(sz), self.requestline, str(status), str(sz),
repr(location) if location else '-') repr(location) if location else '-')
@ -310,7 +310,7 @@ class PlaybackIndexDb(object):
return None, None return None, None
json_value = result_tuple[0] json_value = result_tuple[0]
self.logger.debug("{}:{}".format(repr(url), repr(json_value))) self.logger.debug('%r:%r', url, json_value)
py_value = json.loads(json_value) py_value = json.loads(json_value)
latest_date = max(py_value) latest_date = max(py_value)
@ -330,21 +330,19 @@ class PlaybackIndexDb(object):
return None return None
json_value = result_tuple[0] json_value = result_tuple[0]
self.logger.debug("%s:%s", repr(url), repr(json_value)) self.logger.debug('%r:%r', url, json_value)
py_value = json.loads(json_value) py_value = json.loads(json_value)
if warc_date in py_value: if warc_date in py_value:
for record in py_value[warc_date]: for record in py_value[warc_date]:
if record['i'] == record_id: if record['i'] == record_id:
self.logger.debug( self.logger.debug(
"found exact match for (%s,%s,%s)", "found exact match for (%r,%r,%r)",
repr(warc_date), repr(record_id), repr(url)) warc_date, record_id, url)
record['i'] = record['i'] record['i'] = record['i']
return record return record
else: else:
self.logger.info( self.logger.info(
"match not found for (%s,%s,%s)", repr(warc_date), "match not found for (%r,%r,%r)", warc_date, record_id, url)
repr(record_id), repr(url))
return None return None

View File

@ -256,14 +256,14 @@ class RethinkStatsDb(StatsDb):
dbs = self.rr.db_list().run() dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs: if not self.rr.dbname in dbs:
self.logger.info( self.logger.info(
"creating rethinkdb database %s", repr(self.rr.dbname)) "creating rethinkdb database %r", self.rr.dbname)
self.rr.db_create(self.rr.dbname).run() self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run() tables = self.rr.table_list().run()
if not self.table in tables: if not self.table in tables:
self.logger.info( self.logger.info(
"creating rethinkdb table %s in database %s shards=%s " "creating rethinkdb table %r in database %r shards=%r "
"replicas=%s", repr(self.table), repr(self.rr.dbname), "replicas=%r", self.table, self.rr.dbname, self.shards,
self.shards, self.replicas) self.replicas)
self.rr.table_create( self.rr.table_create(
self.table, primary_key="bucket", shards=self.shards, self.table, primary_key="bucket", shards=self.shards,
replicas=self.replicas).run() replicas=self.replicas).run()

View File

@ -1,23 +1,23 @@
# '''
# warcprox/writer.py - warc writer, manages and writes records to warc files warcprox/writer.py - warc writer, manages and writes records to warc files
#
# Copyright (C) 2013-2016 Internet Archive Copyright (C) 2013-2017 Internet Archive
#
# This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2 as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA. USA.
# '''
from __future__ import absolute_import from __future__ import absolute_import