From 533f5c0af25eed9590a1092074df1cc127ef5ce7 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 15 Aug 2023 14:26:14 -0700 Subject: [PATCH] limit_revisits wants str, not bytes --- warcprox/dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 57a1b72..dfd349a 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -492,7 +492,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor, LimitRevisitsPGMix hash_plus_url = b''.join((payload_hash, recorded_url.url)) if (recorded_url.response_recorder and hash_plus_url not in hash_plus_urls - and not self.limit_revisits(recorded_url, hash_plus_url) + and not self.limit_revisits(recorded_url, hash_plus_url.decode()) and self.trough_dedup_db.should_dedup(recorded_url)): hash_plus_urls.add(hash_plus_url)