Merge branch 'limit_revisits' into qa

This commit is contained in:
Barbara Miller 2023-08-15 14:27:01 -07:00
commit 4e88c90f4d

View File

@ -492,7 +492,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor, LimitRevisitsPGMix
hash_plus_url = b''.join((payload_hash, recorded_url.url))
if (recorded_url.response_recorder
and hash_plus_url not in hash_plus_urls
and not self.limit_revisits(recorded_url, hash_plus_url)
and not self.limit_revisits(recorded_url, hash_plus_url.decode())
and self.trough_dedup_db.should_dedup(recorded_url)):
hash_plus_urls.add(hash_plus_url)