From c5f33bda7abd38b20101deaee079d9ece509b37a Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 30 Nov 2017 12:55:39 -0800 Subject: [PATCH] trough dedup - handle case of no warc records written --- warcprox/dedup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index d1e456d..f21e1df 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -292,11 +292,10 @@ class TroughDedupDb(object): return None def notify(self, recorded_url, records): - if (records[0].get_header(warctools.WarcRecord.TYPE) == warctools.WarcRecord.RESPONSE + if (records and records[0].type == b'response' and recorded_url.response_recorder.payload_size() > 0): digest_key = warcprox.digest_str( - recorded_url.payload_digest, - self.options.base32) + recorded_url.payload_digest, self.options.base32) if recorded_url.warcprox_meta and 'captures-bucket' in recorded_url.warcprox_meta: self.save( digest_key, records[0],