diff --git a/setup.py b/setup.py index b450100..40a5d9d 100755 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ except: setuptools.setup( name='warcprox', - version='2.1b1.dev89', + version='2.1b1.dev90', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/writer.py b/warcprox/writer.py index c93d89a..0c503bf 100644 --- a/warcprox/writer.py +++ b/warcprox/writer.py @@ -152,9 +152,9 @@ class WarcWriterPool: def __init__(self, options=warcprox.Options()): self.default_warc_writer = WarcWriter(options=options) self.warc_writers = {} # {prefix:WarcWriter} - self._last_sync = time.time() self.options = options self._lock = threading.RLock() + self._last_maybe = time.time() # chooses writer for filename specified by warcprox_meta["warc-prefix"] if set def _writer(self, recorded_url): @@ -177,9 +177,11 @@ class WarcWriterPool: return self._writer(recorded_url).write_records(recorded_url) def maybe_idle_rollover(self): - self.default_warc_writer.maybe_idle_rollover() - for w in self.warc_writers.values(): - w.maybe_idle_rollover() + if time.time() - self._last_maybe > 20: + self.default_warc_writer.maybe_idle_rollover() + for w in self.warc_writers.values(): + w.maybe_idle_rollover() + self._last_maybe = time.time() def close_writers(self): self.default_warc_writer.close_writer() diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index f480251..26d3d3c 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -98,11 +98,12 @@ class WarcWriterThread(threading.Thread): # try to release resources in a timely fashion if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: recorded_url.response_recorder.tempfile.close() + + self.writer_pool.maybe_idle_rollover() except queue.Empty: if self.stop.is_set(): break self.idle = time.time() - self.writer_pool.maybe_idle_rollover() self.logger.info('WarcWriterThread shutting down') self.writer_pool.close_writers()