fixes for python 2.7

This commit is contained in:
Noah Levitt 2015-09-22 19:26:09 +00:00
parent abc2d28787
commit 0171cdd01d
2 changed files with 19 additions and 17 deletions

View File

@ -57,13 +57,19 @@ class StatsDb:
pass pass
def value(self, bucket0="__all__", bucket1=None, bucket2=None): def value(self, bucket0="__all__", bucket1=None, bucket2=None):
if bucket0 in self.db: # Gdbm wants str/bytes keys in python2, str/unicode keys in python3.
bucket0_stats = json.loads(self.db[bucket0].decode("utf-8")) # This ugliness deals with keys that arrive as unicode in py2.
if bucket1: b0 = bucket0.encode("utf-8") if bucket0 and not isinstance(bucket0, str) else bucket0
if bucket2: b1 = bucket1.encode("utf-8") if bucket1 and not isinstance(bucket1, str) else bucket1
return bucket0_stats[bucket1][bucket2] b2 = bucket2.encode("utf-8") if bucket2 and not isinstance(bucket2, str) else bucket2
if b0 in self.db:
bucket0_stats = json.loads(self.db[b0].decode("utf-8"))
if b1:
if b2:
return bucket0_stats[b1][b2]
else: else:
return bucket0_stats[bucket1] return bucket0_stats[b1]
else: else:
return bucket0_stats return bucket0_stats
else: else:
@ -83,10 +89,13 @@ class StatsDb:
buckets.append("__unspecified__") buckets.append("__unspecified__")
for bucket in buckets: for bucket in buckets:
if bucket in self.db: # Gdbm wants str/bytes keys in python2, str/unicode keys in python3.
bucket_stats = json.loads(self.db[bucket].decode("utf-8")) # This ugliness deals with keys that arrive as unicode in py2.
b = bucket.encode("utf-8") if bucket and not isinstance(bucket, str) else bucket
if b in self.db:
bucket_stats = json.loads(self.db[b].decode("utf-8"))
else: else:
bucket_stats = _empty_bucket(bucket) bucket_stats = _empty_bucket(b)
bucket_stats["total"]["urls"] += 1 bucket_stats["total"]["urls"] += 1
bucket_stats["total"]["wire_bytes"] += recorded_url.size bucket_stats["total"]["wire_bytes"] += recorded_url.size
@ -98,7 +107,7 @@ class StatsDb:
bucket_stats["new"]["urls"] += 1 bucket_stats["new"]["urls"] += 1
bucket_stats["new"]["wire_bytes"] += recorded_url.size bucket_stats["new"]["wire_bytes"] += recorded_url.size
self.db[bucket] = json.dumps(bucket_stats, separators=(',',':')).encode("utf-8") self.db[b] = json.dumps(bucket_stats, separators=(',',':')).encode("utf-8")
class RethinkStatsDb: class RethinkStatsDb:
logger = logging.getLogger("warcprox.stats.RethinkStatsDb") logger = logging.getLogger("warcprox.stats.RethinkStatsDb")

View File

@ -331,8 +331,6 @@ class RecordedUrl:
else: else:
self.warcprox_meta = {} self.warcprox_meta = {}
if isinstance(content_type, bytes):
raise Exception("content_type is not supposed to be bytes!")
self.content_type = content_type self.content_type = content_type
self.mimetype = content_type self.mimetype = content_type
@ -350,11 +348,6 @@ class RecordedUrl:
self.host = host self.host = host
self.duration = duration self.duration = duration
# def __del__(self):
# self.logger.debug("finished with %s", self)
# if self.response_recorder:
# del self.response_recorder
class SingleThreadedWarcProxy(http_server.HTTPServer): class SingleThreadedWarcProxy(http_server.HTTPServer):
logger = logging.getLogger("warcprox.warcproxy.WarcProxy") logger = logging.getLogger("warcprox.warcproxy.WarcProxy")