mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'WT-31' into qa
This commit is contained in:
commit
85bb6ff437
@ -1,7 +1,7 @@
|
||||
"""
|
||||
warcprox/__init__.py - warcprox package main file, contains some utility code
|
||||
|
||||
Copyright (C) 2013-2019 Internet Archive
|
||||
Copyright (C) 2013-2021 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@ -175,8 +175,8 @@ class BaseStandardPostfetchProcessor(BasePostfetchProcessor):
|
||||
|
||||
class BaseBatchPostfetchProcessor(BasePostfetchProcessor):
|
||||
MAX_BATCH_SIZE = 500
|
||||
MAX_BATCH_SEC = 10
|
||||
MIN_BATCH_SEC = 2.0
|
||||
MAX_BATCH_SEC = 30
|
||||
MIN_BATCH_SEC = 10
|
||||
|
||||
def _get_process_put(self):
|
||||
batch = []
|
||||
|
@ -384,6 +384,9 @@ class BatchTroughStorer(warcprox.BaseBatchPostfetchProcessor):
|
||||
self.trough_dedup_db.batch_save,
|
||||
buckets[bucket], bucket)
|
||||
fs[future] = bucket
|
||||
logging.debug(
|
||||
'storing dedup info for %s urls '
|
||||
'in bucket %s', len(buckets[bucket]), bucket)
|
||||
|
||||
# wait for results
|
||||
try:
|
||||
@ -434,6 +437,8 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
|
||||
warcprox.digest_str(
|
||||
recorded_url.payload_digest, self.options.base32)
|
||||
if recorded_url.payload_digest else 'n/a')
|
||||
self.logger.debug(
|
||||
'hash_plus_urls: {}'.format(hash_plus_urls))
|
||||
self.logger.debug(
|
||||
'len(batch)=%s len(discards)=%s buckets=%s',
|
||||
len(batch), len(discards),
|
||||
|
Loading…
x
Reference in New Issue
Block a user