From bcaf293081fe8d9dc5359019b1c9232e7b39c34b Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 9 Dec 2021 12:19:45 -0800 Subject: [PATCH] better logging --- warcprox/dedup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 2ceb876..26319d3 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -435,14 +435,14 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor): else: if hash_plus_url in hash_plus_urls: self.logger.debug( - 'discarding duplicate {}, setting do_not_archive'.format(hash_plus_url)) + 'discarding duplicate and setting do_not_archive for %, hash %'.format( + recorded_url.url, warcprox.digest_str( + recorded_url.payload_digest, self.options.base32))) recorded_url.do_not_archive = True discards.append( warcprox.digest_str( recorded_url.payload_digest, self.options.base32) if recorded_url.payload_digest else 'n/a') - self.logger.debug( - 'hash_plus_urls: {}'.format(len(hash_plus_urls))) self.logger.debug( 'len(batch)=%s len(discards)=%s buckets=%s', len(batch), len(discards),