From 1500341875f3ca62fba68da659ebc7e4d52c4a5f Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 7 Jun 2017 16:05:47 -0700 Subject: [PATCH] use %r instead of calling repr() --- setup.py | 2 +- tests/test_warcprox.py | 2 +- warcprox/bigtable.py | 18 ++++++++++-------- warcprox/dedup.py | 9 ++++----- warcprox/kafkafeed.py | 2 +- warcprox/mitmproxy.py | 18 +++++++++--------- warcprox/playback.py | 14 ++++++-------- warcprox/stats.py | 8 ++++---- warcprox/writer.py | 40 ++++++++++++++++++++-------------------- 9 files changed, 56 insertions(+), 57 deletions(-) diff --git a/setup.py b/setup.py index 02b57dc..9199399 100755 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ except: setuptools.setup( name='warcprox', - version='2.1b1.dev87', + version='2.1b1.dev88', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 91ba6c7..dd80a86 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -77,7 +77,7 @@ def _send(self, data): logging.root.handlers[0].stream.write(data) logging.root.handlers[0].stream.write('\n') else: - logging.info('sending data from %s', repr(data)) + logging.info('sending data from %r', data) orig_send(self, data) ### uncomment this to block see raw requests going over the wire # http_client.HTTPConnection.send = _send diff --git a/warcprox/bigtable.py b/warcprox/bigtable.py index a61ce8b..94cf9c9 100644 --- a/warcprox/bigtable.py +++ b/warcprox/bigtable.py @@ -102,14 +102,13 @@ class RethinkCaptures: def _ensure_db_table(self): dbs = self.rr.db_list().run() if not self.rr.dbname in dbs: - self.logger.info( - "creating rethinkdb database %s", repr(self.rr.dbname)) + self.logger.info("creating rethinkdb database %r", self.rr.dbname) self.rr.db_create(self.rr.dbname).run() tables = self.rr.table_list().run() if not self.table in tables: self.logger.info( - "creating rethinkdb table %s in database %s", - repr(self.table), repr(self.rr.dbname)) + "creating rethinkdb table %r in database %r", + self.table, self.rr.dbname) self.rr.table_create(self.table, shards=self.shards, replicas=self.replicas).run() self.rr.table(self.table).index_create( "abbr_canon_surt_timestamp", @@ -120,7 +119,7 @@ class RethinkCaptures: def find_response_by_digest(self, algo, raw_digest, bucket="__unspecified__"): if algo != "sha1": raise Exception( - "digest type is %s but big captures table is indexed by " + "digest type is %r but big captures table is indexed by " "sha1" % algo) sha1base32 = base64.b32encode(raw_digest).decode("utf-8") results_iter = self.rr.table(self.table).get_all( @@ -130,11 +129,14 @@ class RethinkCaptures: results = list(results_iter) if len(results) > 0: if len(results) > 1: - self.logger.debug("expected 0 or 1 but found %s results for sha1base32=%s bucket=%s (will use first result)", len(results), sha1base32, bucket) + self.logger.debug( + "expected 0 or 1 but found %r results for " + "sha1base32=%r bucket=%r (will use first result)", + len(results), sha1base32, bucket) result = results[0] else: result = None - self.logger.debug("returning %s for sha1base32=%s bucket=%s", + self.logger.debug("returning %r for sha1base32=%r bucket=%r", result, sha1base32, bucket) return result @@ -146,7 +148,7 @@ class RethinkCaptures: ).decode("utf-8") else: self.logger.warn( - "digest type is %s but big captures table is indexed " + "digest type is %r but big captures table is indexed " "by sha1", recorded_url.response_recorder.payload_digest.name) else: diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 7f894a6..78c5434 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -135,15 +135,14 @@ class RethinkDedupDb: def _ensure_db_table(self): dbs = self.rr.db_list().run() if not self.rr.dbname in dbs: - self.logger.info( - "creating rethinkdb database %s", repr(self.rr.dbname)) + self.logger.info("creating rethinkdb database %r", self.rr.dbname) self.rr.db_create(self.rr.dbname).run() tables = self.rr.table_list().run() if not self.table in tables: self.logger.info( - "creating rethinkdb table %s in database %s shards=%s " - "replicas=%s", repr(self.table), repr(self.rr.dbname), - self.shards, self.replicas) + "creating rethinkdb table %r in database %r shards=%r " + "replicas=%r", self.table, self.rr.dbname, self.shards, + self.replicas) self.rr.table_create( self.table, primary_key="key", shards=self.shards, replicas=self.replicas).run() diff --git a/warcprox/kafkafeed.py b/warcprox/kafkafeed.py index 612101b..64f8594 100644 --- a/warcprox/kafkafeed.py +++ b/warcprox/kafkafeed.py @@ -97,7 +97,7 @@ class CaptureFeed: d[k] = v msg = json.dumps(d, separators=(',', ':')).encode('utf-8') - self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg) + self.logger.debug('feeding kafka topic=%r msg=%r', topic, msg) p = self._producer() if p: p.send(topic, msg) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index d949072..ec9dafc 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -209,8 +209,8 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): u = urllib_parse.urlparse(self.url) if u.scheme != 'http': raise Exception( - 'unable to parse request %s as a proxy request' % ( - repr(self.requestline))) + 'unable to parse request %r as a proxy request' % ( + self.requestline)) host = u.hostname self.port = u.port or 80 self.path = urllib_parse.urlunparse( @@ -294,7 +294,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): except Exception as e: try: self.logger.error( - "problem handling %s: %s", repr(self.requestline), e) + "problem handling %r: %r", self.requestline, e) if type(e) is socket.timeout: self.send_error(504, str(e)) else: @@ -328,7 +328,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): def do_COMMAND(self): self.logger.trace( - 'request from %s:%s: %s', self.client_address[0], + 'request from %s:%s: %r', self.client_address[0], self.client_address[1], self.requestline) try: if self.is_connect: @@ -341,12 +341,12 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self._connect_to_remote_server() except warcprox.RequestBlockedByRule as e: # limit enforcers have already sent the appropriate response - self.logger.info("%s: %s", repr(self.requestline), e) + self.logger.info("%r: %r", self.requestline, e) return except Exception as e: self.logger.error( - "problem processing request %s: %s", - repr(self.requestline), e, exc_info=True) + "problem processing request %r: %r", + self.requestline, e, exc_info=True) self.send_error(500, str(e)) return @@ -393,7 +393,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): req += self.rfile.read(int(self.headers['Content-Length'])) try: - self.logger.debug('sending to remote server req=%s', repr(req)) + self.logger.debug('sending to remote server req=%r', req) # Send it down the pipe! self._remote_server_sock.sendall(req) @@ -411,7 +411,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): self.log_request(prox_rec_res.status, prox_rec_res.recorder.len) except Exception as e: self.logger.error( - "%s proxying %s %s", repr(e), self.command, self.url, + "%r proxying %s %s", e, self.command, self.url, exc_info=True) finally: # Let's close off the remote end diff --git a/warcprox/playback.py b/warcprox/playback.py index 6f5c183..3ee2aaa 100644 --- a/warcprox/playback.py +++ b/warcprox/playback.py @@ -82,7 +82,7 @@ class PlaybackProxyHandler(MitmProxyHandler): self.connection.sendall(payload) sz = len(headers) + len(payload) - self.log_message('"%s" %s %s %s', + self.log_message('%r %s %s %s', self.requestline, str(status), str(sz), repr(location) if location else '-') @@ -310,7 +310,7 @@ class PlaybackIndexDb(object): return None, None json_value = result_tuple[0] - self.logger.debug("{}:{}".format(repr(url), repr(json_value))) + self.logger.debug('%r:%r', url, json_value) py_value = json.loads(json_value) latest_date = max(py_value) @@ -330,21 +330,19 @@ class PlaybackIndexDb(object): return None json_value = result_tuple[0] - self.logger.debug("%s:%s", repr(url), repr(json_value)) + self.logger.debug('%r:%r', url, json_value) py_value = json.loads(json_value) if warc_date in py_value: for record in py_value[warc_date]: if record['i'] == record_id: self.logger.debug( - "found exact match for (%s,%s,%s)", - repr(warc_date), repr(record_id), repr(url)) + "found exact match for (%r,%r,%r)", + warc_date, record_id, url) record['i'] = record['i'] return record else: self.logger.info( - "match not found for (%s,%s,%s)", repr(warc_date), - repr(record_id), repr(url)) + "match not found for (%r,%r,%r)", warc_date, record_id, url) return None - diff --git a/warcprox/stats.py b/warcprox/stats.py index 88fc566..6332cb9 100644 --- a/warcprox/stats.py +++ b/warcprox/stats.py @@ -256,14 +256,14 @@ class RethinkStatsDb(StatsDb): dbs = self.rr.db_list().run() if not self.rr.dbname in dbs: self.logger.info( - "creating rethinkdb database %s", repr(self.rr.dbname)) + "creating rethinkdb database %r", self.rr.dbname) self.rr.db_create(self.rr.dbname).run() tables = self.rr.table_list().run() if not self.table in tables: self.logger.info( - "creating rethinkdb table %s in database %s shards=%s " - "replicas=%s", repr(self.table), repr(self.rr.dbname), - self.shards, self.replicas) + "creating rethinkdb table %r in database %r shards=%r " + "replicas=%r", self.table, self.rr.dbname, self.shards, + self.replicas) self.rr.table_create( self.table, primary_key="bucket", shards=self.shards, replicas=self.replicas).run() diff --git a/warcprox/writer.py b/warcprox/writer.py index 6913560..c93d89a 100644 --- a/warcprox/writer.py +++ b/warcprox/writer.py @@ -1,23 +1,23 @@ -# -# warcprox/writer.py - warc writer, manages and writes records to warc files -# -# Copyright (C) 2013-2016 Internet Archive -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. -# +''' +warcprox/writer.py - warc writer, manages and writes records to warc files + +Copyright (C) 2013-2017 Internet Archive + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +USA. +''' from __future__ import absolute_import