don't keep next processor waiting

in batch postfetch processor, accumulate urls for the next batch for at
most 0.5 sec, if the outq is empty (i.e. the next processor is waiting
idly)
This commit is contained in:
Noah Levitt 2018-01-17 12:27:19 -08:00
parent 9e1a7cb6f0
commit 6a64107478

View File

@ -168,13 +168,24 @@ class BaseStandardPostfetchProcessor(BasePostfetchProcessor):
class BaseBatchPostfetchProcessor(BasePostfetchProcessor):
MAX_BATCH_SIZE = 500
MAX_BATCH_SEC = 10
MIN_BATCH_SEC = 0.5
def _get_process_put(self):
batch = []
start = time.time()
while (len(batch) < self.MAX_BATCH_SIZE
and time.time() - start < self.MAX_BATCH_SEC):
while True:
if len(batch) >= self.MAX_BATCH_SIZE:
break # full batch
elapsed = time.time() - start
if elapsed >= self.MAX_BATCH_SEC:
break # been batching for a while
if (elapsed >= self.MIN_BATCH_SEC and self.outq
and len(self.outq.queue) == 0):
break # next processor is waiting on us
try:
batch.append(self.inq.get(block=True, timeout=0.5))
except queue.Empty: