continue after unexpected error

This commit is contained in:
Noah Levitt 2015-10-30 01:15:03 +00:00
parent fd847f01cd
commit 3363b2ec95

View File

@ -39,31 +39,33 @@ class WarcWriterThread(threading.Thread):
cProfile.runctx('self._run()', globals(), locals(), sort='cumulative') cProfile.runctx('self._run()', globals(), locals(), sort='cumulative')
def _run(self): def _run(self):
try: while not self.stop.is_set():
self.setName('WarcWriterThread(tid={})'.format(warcprox.gettid())) try:
while True: self.setName('WarcWriterThread(tid={})'.format(warcprox.gettid()))
try: while True:
recorded_url = self.recorded_url_q.get(block=True, timeout=0.5) try:
self.idle = None recorded_url = self.recorded_url_q.get(block=True, timeout=0.5)
if self.dedup_db: self.idle = None
warcprox.dedup.decorate_with_dedup_info(self.dedup_db, if self.dedup_db:
recorded_url, base32=self.options.base32) warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
records = self.writer_pool.write_records(recorded_url) recorded_url, base32=self.options.base32)
self._final_tasks(recorded_url, records) records = self.writer_pool.write_records(recorded_url)
self._final_tasks(recorded_url, records)
# try to release resources in a timely fashion # try to release resources in a timely fashion
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
recorded_url.response_recorder.tempfile.close() recorded_url.response_recorder.tempfile.close()
except queue.Empty: except queue.Empty:
if self.stop.is_set(): if self.stop.is_set():
break break
self.idle = time.time() self.idle = time.time()
self.writer_pool.maybe_idle_rollover() self.writer_pool.maybe_idle_rollover()
self.logger.info('WarcWriterThread shutting down') self.logger.info('WarcWriterThread shutting down')
self.writer_pool.close_writers() self.writer_pool.close_writers()
except: except:
self.logger.critical("WarcWriterThread shutting down after unexpected error", exc_info=True) self.logger.critical("WarcWriterThread will try to continue after unexpected error", exc_info=True)
time.sleep(0.5)
# closest thing we have to heritrix crawl log at the moment # closest thing we have to heritrix crawl log at the moment
def _log(self, recorded_url, records): def _log(self, recorded_url, records):