From 57abab100cd20da215e7105127d2bb5794f92faf Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 19 Jan 2018 14:38:54 -0800 Subject: [PATCH] handle case where warc record id is missing ... from trough dedup. Not sure why this error happened but we shouldn't need that field anyway. --- warcprox/dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index cd3b397..c9b0079 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -419,7 +419,7 @@ class TroughDedupDb(DedupDb): len(digest_keys), len(results)) assert len(results) >= 0 and len(results) <= len(digest_keys) for result in results: - result['id'] = result['id'].encode('ascii') + result['id'] = result.get('id') and result['id'].encode('ascii') result['url'] = result['url'].encode('ascii') result['date'] = result['date'].encode('ascii') result['digest_key'] = result['digest_key'].encode('ascii')