From da089e0a92d03f28a52fe4983240c30bcef76481 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 6 Dec 2021 20:33:16 -0800 Subject: [PATCH] bytes not str --- warcprox/dedup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 336747f..44319ea 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -419,8 +419,8 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor): hash_plus_urls = set() for recorded_url in batch: if recorded_url.payload_digest: - hash_plus_url = ''.join((warcprox.digest_str( - recorded_url.payload_digest, self.options.base32), recorded_url.url.decode())) + hash_plus_url = b''.join((warcprox.digest_str( + recorded_url.payload_digest, self.options.base32), recorded_url.url)) if (recorded_url.response_recorder and recorded_url.payload_digest and self.trough_dedup_db.should_dedup(recorded_url) @@ -441,7 +441,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor): recorded_url.payload_digest, self.options.base32) if recorded_url.payload_digest else 'n/a') self.logger.debug( - 'hash_plus_urls: {}...'.format(hash_plus_urls[0])) + 'hash_plus_urls: {}'.format(len(hash_plus_urls))) self.logger.debug( 'len(batch)=%s len(discards)=%s buckets=%s', len(batch), len(discards),