From 887680b0ec986ad0e5b8a4d6abd2bc9aa92b2fec Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 15 Aug 2023 15:50:02 -0700 Subject: [PATCH] try iso-8859-1 --- warcprox/dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index dfd349a..fa170c9 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -492,7 +492,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor, LimitRevisitsPGMix hash_plus_url = b''.join((payload_hash, recorded_url.url)) if (recorded_url.response_recorder and hash_plus_url not in hash_plus_urls - and not self.limit_revisits(recorded_url, hash_plus_url.decode()) + and not self.limit_revisits(recorded_url, hash_plus_url.decode('iso-8859-1')) and self.trough_dedup_db.should_dedup(recorded_url)): hash_plus_urls.add(hash_plus_url)