mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
don't wait for queue to be empty to do idle rollovers, because sometimes warcprox can stay busy for a long, long time
This commit is contained in:
parent
2f0c4454ac
commit
24082c2e8c
2
setup.py
2
setup.py
@ -50,7 +50,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.1b1.dev89',
|
version='2.1b1.dev90',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -152,9 +152,9 @@ class WarcWriterPool:
|
|||||||
def __init__(self, options=warcprox.Options()):
|
def __init__(self, options=warcprox.Options()):
|
||||||
self.default_warc_writer = WarcWriter(options=options)
|
self.default_warc_writer = WarcWriter(options=options)
|
||||||
self.warc_writers = {} # {prefix:WarcWriter}
|
self.warc_writers = {} # {prefix:WarcWriter}
|
||||||
self._last_sync = time.time()
|
|
||||||
self.options = options
|
self.options = options
|
||||||
self._lock = threading.RLock()
|
self._lock = threading.RLock()
|
||||||
|
self._last_maybe = time.time()
|
||||||
|
|
||||||
# chooses writer for filename specified by warcprox_meta["warc-prefix"] if set
|
# chooses writer for filename specified by warcprox_meta["warc-prefix"] if set
|
||||||
def _writer(self, recorded_url):
|
def _writer(self, recorded_url):
|
||||||
@ -177,9 +177,11 @@ class WarcWriterPool:
|
|||||||
return self._writer(recorded_url).write_records(recorded_url)
|
return self._writer(recorded_url).write_records(recorded_url)
|
||||||
|
|
||||||
def maybe_idle_rollover(self):
|
def maybe_idle_rollover(self):
|
||||||
self.default_warc_writer.maybe_idle_rollover()
|
if time.time() - self._last_maybe > 20:
|
||||||
for w in self.warc_writers.values():
|
self.default_warc_writer.maybe_idle_rollover()
|
||||||
w.maybe_idle_rollover()
|
for w in self.warc_writers.values():
|
||||||
|
w.maybe_idle_rollover()
|
||||||
|
self._last_maybe = time.time()
|
||||||
|
|
||||||
def close_writers(self):
|
def close_writers(self):
|
||||||
self.default_warc_writer.close_writer()
|
self.default_warc_writer.close_writer()
|
||||||
|
@ -98,11 +98,12 @@ class WarcWriterThread(threading.Thread):
|
|||||||
# try to release resources in a timely fashion
|
# try to release resources in a timely fashion
|
||||||
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
|
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
|
||||||
recorded_url.response_recorder.tempfile.close()
|
recorded_url.response_recorder.tempfile.close()
|
||||||
|
|
||||||
|
self.writer_pool.maybe_idle_rollover()
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
if self.stop.is_set():
|
if self.stop.is_set():
|
||||||
break
|
break
|
||||||
self.idle = time.time()
|
self.idle = time.time()
|
||||||
self.writer_pool.maybe_idle_rollover()
|
|
||||||
|
|
||||||
self.logger.info('WarcWriterThread shutting down')
|
self.logger.info('WarcWriterThread shutting down')
|
||||||
self.writer_pool.close_writers()
|
self.writer_pool.close_writers()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user