don't wait for queue to be empty to do idle rollovers, because sometimes warcprox can stay busy for a long, long time

This commit is contained in:
Noah Levitt 2017-06-22 15:04:01 -07:00
parent 2f0c4454ac
commit 24082c2e8c
3 changed files with 9 additions and 6 deletions

View File

@ -50,7 +50,7 @@ except:
setuptools.setup( setuptools.setup(
name='warcprox', name='warcprox',
version='2.1b1.dev89', version='2.1b1.dev90',
description='WARC writing MITM HTTP/S proxy', description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox', url='https://github.com/internetarchive/warcprox',
author='Noah Levitt', author='Noah Levitt',

View File

@ -152,9 +152,9 @@ class WarcWriterPool:
def __init__(self, options=warcprox.Options()): def __init__(self, options=warcprox.Options()):
self.default_warc_writer = WarcWriter(options=options) self.default_warc_writer = WarcWriter(options=options)
self.warc_writers = {} # {prefix:WarcWriter} self.warc_writers = {} # {prefix:WarcWriter}
self._last_sync = time.time()
self.options = options self.options = options
self._lock = threading.RLock() self._lock = threading.RLock()
self._last_maybe = time.time()
# chooses writer for filename specified by warcprox_meta["warc-prefix"] if set # chooses writer for filename specified by warcprox_meta["warc-prefix"] if set
def _writer(self, recorded_url): def _writer(self, recorded_url):
@ -177,9 +177,11 @@ class WarcWriterPool:
return self._writer(recorded_url).write_records(recorded_url) return self._writer(recorded_url).write_records(recorded_url)
def maybe_idle_rollover(self): def maybe_idle_rollover(self):
self.default_warc_writer.maybe_idle_rollover() if time.time() - self._last_maybe > 20:
for w in self.warc_writers.values(): self.default_warc_writer.maybe_idle_rollover()
w.maybe_idle_rollover() for w in self.warc_writers.values():
w.maybe_idle_rollover()
self._last_maybe = time.time()
def close_writers(self): def close_writers(self):
self.default_warc_writer.close_writer() self.default_warc_writer.close_writer()

View File

@ -98,11 +98,12 @@ class WarcWriterThread(threading.Thread):
# try to release resources in a timely fashion # try to release resources in a timely fashion
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
recorded_url.response_recorder.tempfile.close() recorded_url.response_recorder.tempfile.close()
self.writer_pool.maybe_idle_rollover()
except queue.Empty: except queue.Empty:
if self.stop.is_set(): if self.stop.is_set():
break break
self.idle = time.time() self.idle = time.time()
self.writer_pool.maybe_idle_rollover()
self.logger.info('WarcWriterThread shutting down') self.logger.info('WarcWriterThread shutting down')
self.writer_pool.close_writers() self.writer_pool.close_writers()