From ca4c62fc6d91184be6a69a0798e31aeb5d94682f Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 30 Oct 2015 01:15:27 +0000 Subject: [PATCH] don't load dedup info for empty payload --- warcprox/dedup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 942ee9f..17735ed 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -73,7 +73,9 @@ class DedupDb(object): def decorate_with_dedup_info(dedup_db, recorded_url, base32=False): - if recorded_url.response_recorder and recorded_url.response_recorder.payload_digest: + if (recorded_url.response_recorder + and recorded_url.response_recorder.payload_digest + and recorded_url.response_recorder.payload_size() > 0): digest_key = warcprox.digest_str(recorded_url.response_recorder.payload_digest, base32) if recorded_url.warcprox_meta and "captures-bucket" in recorded_url.warcprox_meta: recorded_url.dedup_info = dedup_db.lookup(digest_key, recorded_url.warcprox_meta["captures-bucket"])