use %r instead of calling repr()

This commit is contained in:
Noah Levitt 2017-06-07 16:05:47 -07:00
parent 2f93cdcad9
commit 1500341875
9 changed files with 56 additions and 57 deletions

View File

@ -50,7 +50,7 @@ except:
setuptools.setup(
name='warcprox',
version='2.1b1.dev87',
version='2.1b1.dev88',
description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox',
author='Noah Levitt',

View File

@ -77,7 +77,7 @@ def _send(self, data):
logging.root.handlers[0].stream.write(data)
logging.root.handlers[0].stream.write('\n')
else:
logging.info('sending data from %s', repr(data))
logging.info('sending data from %r', data)
orig_send(self, data)
### uncomment this to block see raw requests going over the wire
# http_client.HTTPConnection.send = _send

View File

@ -102,14 +102,13 @@ class RethinkCaptures:
def _ensure_db_table(self):
dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs:
self.logger.info(
"creating rethinkdb database %s", repr(self.rr.dbname))
self.logger.info("creating rethinkdb database %r", self.rr.dbname)
self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run()
if not self.table in tables:
self.logger.info(
"creating rethinkdb table %s in database %s",
repr(self.table), repr(self.rr.dbname))
"creating rethinkdb table %r in database %r",
self.table, self.rr.dbname)
self.rr.table_create(self.table, shards=self.shards, replicas=self.replicas).run()
self.rr.table(self.table).index_create(
"abbr_canon_surt_timestamp",
@ -120,7 +119,7 @@ class RethinkCaptures:
def find_response_by_digest(self, algo, raw_digest, bucket="__unspecified__"):
if algo != "sha1":
raise Exception(
"digest type is %s but big captures table is indexed by "
"digest type is %r but big captures table is indexed by "
"sha1" % algo)
sha1base32 = base64.b32encode(raw_digest).decode("utf-8")
results_iter = self.rr.table(self.table).get_all(
@ -130,11 +129,14 @@ class RethinkCaptures:
results = list(results_iter)
if len(results) > 0:
if len(results) > 1:
self.logger.debug("expected 0 or 1 but found %s results for sha1base32=%s bucket=%s (will use first result)", len(results), sha1base32, bucket)
self.logger.debug(
"expected 0 or 1 but found %r results for "
"sha1base32=%r bucket=%r (will use first result)",
len(results), sha1base32, bucket)
result = results[0]
else:
result = None
self.logger.debug("returning %s for sha1base32=%s bucket=%s",
self.logger.debug("returning %r for sha1base32=%r bucket=%r",
result, sha1base32, bucket)
return result
@ -146,7 +148,7 @@ class RethinkCaptures:
).decode("utf-8")
else:
self.logger.warn(
"digest type is %s but big captures table is indexed "
"digest type is %r but big captures table is indexed "
"by sha1",
recorded_url.response_recorder.payload_digest.name)
else:

View File

@ -135,15 +135,14 @@ class RethinkDedupDb:
def _ensure_db_table(self):
dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs:
self.logger.info(
"creating rethinkdb database %s", repr(self.rr.dbname))
self.logger.info("creating rethinkdb database %r", self.rr.dbname)
self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run()
if not self.table in tables:
self.logger.info(
"creating rethinkdb table %s in database %s shards=%s "
"replicas=%s", repr(self.table), repr(self.rr.dbname),
self.shards, self.replicas)
"creating rethinkdb table %r in database %r shards=%r "
"replicas=%r", self.table, self.rr.dbname, self.shards,
self.replicas)
self.rr.table_create(
self.table, primary_key="key", shards=self.shards,
replicas=self.replicas).run()

View File

@ -97,7 +97,7 @@ class CaptureFeed:
d[k] = v
msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg)
self.logger.debug('feeding kafka topic=%r msg=%r', topic, msg)
p = self._producer()
if p:
p.send(topic, msg)

View File

@ -209,8 +209,8 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
u = urllib_parse.urlparse(self.url)
if u.scheme != 'http':
raise Exception(
'unable to parse request %s as a proxy request' % (
repr(self.requestline)))
'unable to parse request %r as a proxy request' % (
self.requestline))
host = u.hostname
self.port = u.port or 80
self.path = urllib_parse.urlunparse(
@ -294,7 +294,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
except Exception as e:
try:
self.logger.error(
"problem handling %s: %s", repr(self.requestline), e)
"problem handling %r: %r", self.requestline, e)
if type(e) is socket.timeout:
self.send_error(504, str(e))
else:
@ -328,7 +328,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
def do_COMMAND(self):
self.logger.trace(
'request from %s:%s: %s', self.client_address[0],
'request from %s:%s: %r', self.client_address[0],
self.client_address[1], self.requestline)
try:
if self.is_connect:
@ -341,12 +341,12 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self._connect_to_remote_server()
except warcprox.RequestBlockedByRule as e:
# limit enforcers have already sent the appropriate response
self.logger.info("%s: %s", repr(self.requestline), e)
self.logger.info("%r: %r", self.requestline, e)
return
except Exception as e:
self.logger.error(
"problem processing request %s: %s",
repr(self.requestline), e, exc_info=True)
"problem processing request %r: %r",
self.requestline, e, exc_info=True)
self.send_error(500, str(e))
return
@ -393,7 +393,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
req += self.rfile.read(int(self.headers['Content-Length']))
try:
self.logger.debug('sending to remote server req=%s', repr(req))
self.logger.debug('sending to remote server req=%r', req)
# Send it down the pipe!
self._remote_server_sock.sendall(req)
@ -411,7 +411,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
except Exception as e:
self.logger.error(
"%s proxying %s %s", repr(e), self.command, self.url,
"%r proxying %s %s", e, self.command, self.url,
exc_info=True)
finally:
# Let's close off the remote end

View File

@ -82,7 +82,7 @@ class PlaybackProxyHandler(MitmProxyHandler):
self.connection.sendall(payload)
sz = len(headers) + len(payload)
self.log_message('"%s" %s %s %s',
self.log_message('%r %s %s %s',
self.requestline, str(status), str(sz),
repr(location) if location else '-')
@ -310,7 +310,7 @@ class PlaybackIndexDb(object):
return None, None
json_value = result_tuple[0]
self.logger.debug("{}:{}".format(repr(url), repr(json_value)))
self.logger.debug('%r:%r', url, json_value)
py_value = json.loads(json_value)
latest_date = max(py_value)
@ -330,21 +330,19 @@ class PlaybackIndexDb(object):
return None
json_value = result_tuple[0]
self.logger.debug("%s:%s", repr(url), repr(json_value))
self.logger.debug('%r:%r', url, json_value)
py_value = json.loads(json_value)
if warc_date in py_value:
for record in py_value[warc_date]:
if record['i'] == record_id:
self.logger.debug(
"found exact match for (%s,%s,%s)",
repr(warc_date), repr(record_id), repr(url))
"found exact match for (%r,%r,%r)",
warc_date, record_id, url)
record['i'] = record['i']
return record
else:
self.logger.info(
"match not found for (%s,%s,%s)", repr(warc_date),
repr(record_id), repr(url))
"match not found for (%r,%r,%r)", warc_date, record_id, url)
return None

View File

@ -256,14 +256,14 @@ class RethinkStatsDb(StatsDb):
dbs = self.rr.db_list().run()
if not self.rr.dbname in dbs:
self.logger.info(
"creating rethinkdb database %s", repr(self.rr.dbname))
"creating rethinkdb database %r", self.rr.dbname)
self.rr.db_create(self.rr.dbname).run()
tables = self.rr.table_list().run()
if not self.table in tables:
self.logger.info(
"creating rethinkdb table %s in database %s shards=%s "
"replicas=%s", repr(self.table), repr(self.rr.dbname),
self.shards, self.replicas)
"creating rethinkdb table %r in database %r shards=%r "
"replicas=%r", self.table, self.rr.dbname, self.shards,
self.replicas)
self.rr.table_create(
self.table, primary_key="bucket", shards=self.shards,
replicas=self.replicas).run()

View File

@ -1,23 +1,23 @@
#
# warcprox/writer.py - warc writer, manages and writes records to warc files
#
# Copyright (C) 2013-2016 Internet Archive
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
#
'''
warcprox/writer.py - warc writer, manages and writes records to warc files
Copyright (C) 2013-2017 Internet Archive
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
'''
from __future__ import absolute_import