mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
dedup-bucket is required in Warcprox-Meta to do dedup
Modify `DedupableMixin.should_dedup` to check Warcprox-Meta for `dedup-bucket` in order to perform dedup.
This commit is contained in:
parent
9baa2e22d5
commit
432e42803c
@ -44,8 +44,12 @@ class DedupableMixin(object):
|
||||
|
||||
def should_dedup(self, recorded_url):
|
||||
"""Check if we should try to run dedup on resource based on payload
|
||||
size compared with min text/binary dedup size options. Return Boolean.
|
||||
size compared with min text/binary dedup size options.
|
||||
`dedup-bucket` is required in Warcprox-Meta to perform dedup.
|
||||
Return Boolean.
|
||||
"""
|
||||
if "dedup-bucket" not in recorded_url.warcprox_meta:
|
||||
return False
|
||||
if recorded_url.is_text():
|
||||
return recorded_url.response_recorder.payload_size() > self.min_text_size
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user