Merge branch 'WT-31' into qa

This commit is contained in:
Barbara Miller 2021-12-06 17:30:25 -08:00
commit 85bb6ff437
2 changed files with 8 additions and 3 deletions

View File

@ -1,7 +1,7 @@
"""
warcprox/__init__.py - warcprox package main file, contains some utility code
Copyright (C) 2013-2019 Internet Archive
Copyright (C) 2013-2021 Internet Archive
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -175,8 +175,8 @@ class BaseStandardPostfetchProcessor(BasePostfetchProcessor):
class BaseBatchPostfetchProcessor(BasePostfetchProcessor):
MAX_BATCH_SIZE = 500
MAX_BATCH_SEC = 10
MIN_BATCH_SEC = 2.0
MAX_BATCH_SEC = 30
MIN_BATCH_SEC = 10
def _get_process_put(self):
batch = []

View File

@ -384,6 +384,9 @@ class BatchTroughStorer(warcprox.BaseBatchPostfetchProcessor):
self.trough_dedup_db.batch_save,
buckets[bucket], bucket)
fs[future] = bucket
logging.debug(
'storing dedup info for %s urls '
'in bucket %s', len(buckets[bucket]), bucket)
# wait for results
try:
@ -434,6 +437,8 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
warcprox.digest_str(
recorded_url.payload_digest, self.options.base32)
if recorded_url.payload_digest else 'n/a')
self.logger.debug(
'hash_plus_urls: {}'.format(hash_plus_urls))
self.logger.debug(
'len(batch)=%s len(discards)=%s buckets=%s',
len(batch), len(discards),