From 15271835f616e265aaa38d3295a1c888d7f9d576 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 15 Aug 2023 16:11:45 -0700 Subject: [PATCH] format in limit_revisits --- warcprox/dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index fa170c9..77473e6 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -492,7 +492,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor, LimitRevisitsPGMix hash_plus_url = b''.join((payload_hash, recorded_url.url)) if (recorded_url.response_recorder and hash_plus_url not in hash_plus_urls - and not self.limit_revisits(recorded_url, hash_plus_url.decode('iso-8859-1')) + and not self.limit_revisits(recorded_url) and self.trough_dedup_db.should_dedup(recorded_url)): hash_plus_urls.add(hash_plus_url)