mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
use %r instead of calling repr()
This commit is contained in:
parent
2f93cdcad9
commit
1500341875
2
setup.py
2
setup.py
@ -50,7 +50,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.1b1.dev87',
|
||||
version='2.1b1.dev88',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -77,7 +77,7 @@ def _send(self, data):
|
||||
logging.root.handlers[0].stream.write(data)
|
||||
logging.root.handlers[0].stream.write('\n')
|
||||
else:
|
||||
logging.info('sending data from %s', repr(data))
|
||||
logging.info('sending data from %r', data)
|
||||
orig_send(self, data)
|
||||
### uncomment this to block see raw requests going over the wire
|
||||
# http_client.HTTPConnection.send = _send
|
||||
|
@ -102,14 +102,13 @@ class RethinkCaptures:
|
||||
def _ensure_db_table(self):
|
||||
dbs = self.rr.db_list().run()
|
||||
if not self.rr.dbname in dbs:
|
||||
self.logger.info(
|
||||
"creating rethinkdb database %s", repr(self.rr.dbname))
|
||||
self.logger.info("creating rethinkdb database %r", self.rr.dbname)
|
||||
self.rr.db_create(self.rr.dbname).run()
|
||||
tables = self.rr.table_list().run()
|
||||
if not self.table in tables:
|
||||
self.logger.info(
|
||||
"creating rethinkdb table %s in database %s",
|
||||
repr(self.table), repr(self.rr.dbname))
|
||||
"creating rethinkdb table %r in database %r",
|
||||
self.table, self.rr.dbname)
|
||||
self.rr.table_create(self.table, shards=self.shards, replicas=self.replicas).run()
|
||||
self.rr.table(self.table).index_create(
|
||||
"abbr_canon_surt_timestamp",
|
||||
@ -120,7 +119,7 @@ class RethinkCaptures:
|
||||
def find_response_by_digest(self, algo, raw_digest, bucket="__unspecified__"):
|
||||
if algo != "sha1":
|
||||
raise Exception(
|
||||
"digest type is %s but big captures table is indexed by "
|
||||
"digest type is %r but big captures table is indexed by "
|
||||
"sha1" % algo)
|
||||
sha1base32 = base64.b32encode(raw_digest).decode("utf-8")
|
||||
results_iter = self.rr.table(self.table).get_all(
|
||||
@ -130,11 +129,14 @@ class RethinkCaptures:
|
||||
results = list(results_iter)
|
||||
if len(results) > 0:
|
||||
if len(results) > 1:
|
||||
self.logger.debug("expected 0 or 1 but found %s results for sha1base32=%s bucket=%s (will use first result)", len(results), sha1base32, bucket)
|
||||
self.logger.debug(
|
||||
"expected 0 or 1 but found %r results for "
|
||||
"sha1base32=%r bucket=%r (will use first result)",
|
||||
len(results), sha1base32, bucket)
|
||||
result = results[0]
|
||||
else:
|
||||
result = None
|
||||
self.logger.debug("returning %s for sha1base32=%s bucket=%s",
|
||||
self.logger.debug("returning %r for sha1base32=%r bucket=%r",
|
||||
result, sha1base32, bucket)
|
||||
return result
|
||||
|
||||
@ -146,7 +148,7 @@ class RethinkCaptures:
|
||||
).decode("utf-8")
|
||||
else:
|
||||
self.logger.warn(
|
||||
"digest type is %s but big captures table is indexed "
|
||||
"digest type is %r but big captures table is indexed "
|
||||
"by sha1",
|
||||
recorded_url.response_recorder.payload_digest.name)
|
||||
else:
|
||||
|
@ -135,15 +135,14 @@ class RethinkDedupDb:
|
||||
def _ensure_db_table(self):
|
||||
dbs = self.rr.db_list().run()
|
||||
if not self.rr.dbname in dbs:
|
||||
self.logger.info(
|
||||
"creating rethinkdb database %s", repr(self.rr.dbname))
|
||||
self.logger.info("creating rethinkdb database %r", self.rr.dbname)
|
||||
self.rr.db_create(self.rr.dbname).run()
|
||||
tables = self.rr.table_list().run()
|
||||
if not self.table in tables:
|
||||
self.logger.info(
|
||||
"creating rethinkdb table %s in database %s shards=%s "
|
||||
"replicas=%s", repr(self.table), repr(self.rr.dbname),
|
||||
self.shards, self.replicas)
|
||||
"creating rethinkdb table %r in database %r shards=%r "
|
||||
"replicas=%r", self.table, self.rr.dbname, self.shards,
|
||||
self.replicas)
|
||||
self.rr.table_create(
|
||||
self.table, primary_key="key", shards=self.shards,
|
||||
replicas=self.replicas).run()
|
||||
|
@ -97,7 +97,7 @@ class CaptureFeed:
|
||||
d[k] = v
|
||||
|
||||
msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
|
||||
self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg)
|
||||
self.logger.debug('feeding kafka topic=%r msg=%r', topic, msg)
|
||||
p = self._producer()
|
||||
if p:
|
||||
p.send(topic, msg)
|
||||
|
@ -209,8 +209,8 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
u = urllib_parse.urlparse(self.url)
|
||||
if u.scheme != 'http':
|
||||
raise Exception(
|
||||
'unable to parse request %s as a proxy request' % (
|
||||
repr(self.requestline)))
|
||||
'unable to parse request %r as a proxy request' % (
|
||||
self.requestline))
|
||||
host = u.hostname
|
||||
self.port = u.port or 80
|
||||
self.path = urllib_parse.urlunparse(
|
||||
@ -294,7 +294,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
try:
|
||||
self.logger.error(
|
||||
"problem handling %s: %s", repr(self.requestline), e)
|
||||
"problem handling %r: %r", self.requestline, e)
|
||||
if type(e) is socket.timeout:
|
||||
self.send_error(504, str(e))
|
||||
else:
|
||||
@ -328,7 +328,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
|
||||
def do_COMMAND(self):
|
||||
self.logger.trace(
|
||||
'request from %s:%s: %s', self.client_address[0],
|
||||
'request from %s:%s: %r', self.client_address[0],
|
||||
self.client_address[1], self.requestline)
|
||||
try:
|
||||
if self.is_connect:
|
||||
@ -341,12 +341,12 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self._connect_to_remote_server()
|
||||
except warcprox.RequestBlockedByRule as e:
|
||||
# limit enforcers have already sent the appropriate response
|
||||
self.logger.info("%s: %s", repr(self.requestline), e)
|
||||
self.logger.info("%r: %r", self.requestline, e)
|
||||
return
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"problem processing request %s: %s",
|
||||
repr(self.requestline), e, exc_info=True)
|
||||
"problem processing request %r: %r",
|
||||
self.requestline, e, exc_info=True)
|
||||
self.send_error(500, str(e))
|
||||
return
|
||||
|
||||
@ -393,7 +393,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
req += self.rfile.read(int(self.headers['Content-Length']))
|
||||
|
||||
try:
|
||||
self.logger.debug('sending to remote server req=%s', repr(req))
|
||||
self.logger.debug('sending to remote server req=%r', req)
|
||||
|
||||
# Send it down the pipe!
|
||||
self._remote_server_sock.sendall(req)
|
||||
@ -411,7 +411,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"%s proxying %s %s", repr(e), self.command, self.url,
|
||||
"%r proxying %s %s", e, self.command, self.url,
|
||||
exc_info=True)
|
||||
finally:
|
||||
# Let's close off the remote end
|
||||
|
@ -82,7 +82,7 @@ class PlaybackProxyHandler(MitmProxyHandler):
|
||||
self.connection.sendall(payload)
|
||||
sz = len(headers) + len(payload)
|
||||
|
||||
self.log_message('"%s" %s %s %s',
|
||||
self.log_message('%r %s %s %s',
|
||||
self.requestline, str(status), str(sz),
|
||||
repr(location) if location else '-')
|
||||
|
||||
@ -310,7 +310,7 @@ class PlaybackIndexDb(object):
|
||||
return None, None
|
||||
|
||||
json_value = result_tuple[0]
|
||||
self.logger.debug("{}:{}".format(repr(url), repr(json_value)))
|
||||
self.logger.debug('%r:%r', url, json_value)
|
||||
py_value = json.loads(json_value)
|
||||
|
||||
latest_date = max(py_value)
|
||||
@ -330,21 +330,19 @@ class PlaybackIndexDb(object):
|
||||
return None
|
||||
|
||||
json_value = result_tuple[0]
|
||||
self.logger.debug("%s:%s", repr(url), repr(json_value))
|
||||
self.logger.debug('%r:%r', url, json_value)
|
||||
py_value = json.loads(json_value)
|
||||
|
||||
if warc_date in py_value:
|
||||
for record in py_value[warc_date]:
|
||||
if record['i'] == record_id:
|
||||
self.logger.debug(
|
||||
"found exact match for (%s,%s,%s)",
|
||||
repr(warc_date), repr(record_id), repr(url))
|
||||
"found exact match for (%r,%r,%r)",
|
||||
warc_date, record_id, url)
|
||||
record['i'] = record['i']
|
||||
return record
|
||||
else:
|
||||
self.logger.info(
|
||||
"match not found for (%s,%s,%s)", repr(warc_date),
|
||||
repr(record_id), repr(url))
|
||||
"match not found for (%r,%r,%r)", warc_date, record_id, url)
|
||||
return None
|
||||
|
||||
|
||||
|
@ -256,14 +256,14 @@ class RethinkStatsDb(StatsDb):
|
||||
dbs = self.rr.db_list().run()
|
||||
if not self.rr.dbname in dbs:
|
||||
self.logger.info(
|
||||
"creating rethinkdb database %s", repr(self.rr.dbname))
|
||||
"creating rethinkdb database %r", self.rr.dbname)
|
||||
self.rr.db_create(self.rr.dbname).run()
|
||||
tables = self.rr.table_list().run()
|
||||
if not self.table in tables:
|
||||
self.logger.info(
|
||||
"creating rethinkdb table %s in database %s shards=%s "
|
||||
"replicas=%s", repr(self.table), repr(self.rr.dbname),
|
||||
self.shards, self.replicas)
|
||||
"creating rethinkdb table %r in database %r shards=%r "
|
||||
"replicas=%r", self.table, self.rr.dbname, self.shards,
|
||||
self.replicas)
|
||||
self.rr.table_create(
|
||||
self.table, primary_key="bucket", shards=self.shards,
|
||||
replicas=self.replicas).run()
|
||||
|
@ -1,23 +1,23 @@
|
||||
#
|
||||
# warcprox/writer.py - warc writer, manages and writes records to warc files
|
||||
#
|
||||
# Copyright (C) 2013-2016 Internet Archive
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
# USA.
|
||||
#
|
||||
'''
|
||||
warcprox/writer.py - warc writer, manages and writes records to warc files
|
||||
|
||||
Copyright (C) 2013-2017 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
USA.
|
||||
'''
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user