From 99fb998e1dc4e91da18182cfa59cf1201d0edc4f Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Tue, 12 Feb 2019 21:46:49 +0000 Subject: [PATCH] log LRU cache info every 1000 requests to avoid writing to the log too often. --- warcprox/dedup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index d86f4f8..0bb15f6 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -318,7 +318,9 @@ class CdxServerDedupLoader(warcprox.BaseBatchPostfetchProcessor, DedupableMixin) digest_key = warcprox.digest_str(recorded_url.payload_digest, self.options.base32) dedup_info = self.cdx_dedup.cached_lookup(digest_key, recorded_url.url) - self.logger.info(self.cdx_dedup.cached_lookup.cache_info()) + cache_info = self.cdx_dedup.cached_lookup.cache_info() + if (cache_info.hits + cache_info.misses) % 1000 == 0: + self.logger.info(self.cdx_dedup.cached_lookup.cache_info()) if dedup_info: recorded_url.dedup_info = dedup_info except ValueError as exc: