mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'limit_revisits' into qa
This commit is contained in:
commit
f0b69dd74e
@ -492,7 +492,7 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor, LimitRevisitsPGMix
|
||||
hash_plus_url = b''.join((payload_hash, recorded_url.url))
|
||||
if (recorded_url.response_recorder
|
||||
and hash_plus_url not in hash_plus_urls
|
||||
and not self.limit_revisits(recorded_url, hash_plus_url.decode('iso-8859-1'))
|
||||
and not self.limit_revisits(recorded_url)
|
||||
and self.trough_dedup_db.should_dedup(recorded_url)):
|
||||
hash_plus_urls.add(hash_plus_url)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user