From 52e83632dd9bfab46c3387f50cb218d72ea3a005 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 26 Sep 2019 17:34:31 +0000 Subject: [PATCH] Another exception when trying to close a WARC file Recently, we found and fixed a problem when closing a WARC file. https://github.com/internetarchive/warcprox/pull/140 After using the updated warcprox in production, we got another exception in the same method, right after that point. ``` ERROR:root:caught exception processing b'https://abs.twimg.com/favicons/favicon.ico' Traceback (most recent call last): File "/opt/spn2/lib/python3.5/site-packages/warcprox/writerthread.py", line 78, in _process_url records = self.writer_pool.write_records(recorded_url) File "/opt/spn2/lib/python3.5/site-packages/warcprox/writer.py", line 227, in write_records return self._writer(recorded_url).write_records(recorded_url) File "/opt/spn2/lib/python3.5/site-packages/warcprox/writer.py", line 139, in write_records offset = self.f.tell() ValueError: I/O operation on closed file ERROR:warcprox.writer.WarcWriter:could not unlock file /1/liveweb/warcs/liveweb-20190923194044-wwwb-spn14.us.archive.org.warc.gz (I/O operation on closed file) CRITICAL:warcprox.writerthread.WarcWriterProcessor:WarcWriterProcessor(tid=6228) will try to continue after unexpected error Traceback (most recent call last): File "/opt/spn2/lib/python3.5/site-packages/warcprox/__init__.py", line 140, in _run self._get_process_put() File "/opt/spn2/lib/python3.5/site-packages/warcprox/writerthread.py", line 60, in _get_process_put self.writer_pool.maybe_idle_rollover() File "/opt/spn2/lib/python3.5/site-packages/warcprox/writer.py", line 233, in maybe_idle_rollover w.maybe_idle_rollover() File "/opt/spn2/lib/python3.5/site-packages/warcprox/writer.py", line 188, in maybe_idle_rollover self.close() File "/opt/spn2/lib/python3.5/site-packages/warcprox/writer.py", line 176, in close os.rename(self.path, finalpath) FileNotFoundError: [Errno 2] No such file or directory: '/1/liveweb/warcs/liveweb-20190923194044-wwwb-spn14.us.archive.org.warc.gz' -> '/1/liveweb/warcs/liveweb-20190923194044-wwwb-spn14.us.archive.org.warc.gz' ``` We don't have a WARC file and our code tries to run `os.rename` on a file that doesn't exist. We add exception handling for that case as well. I should have foreseen that when doing the previous fix :( --- warcprox/writer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/warcprox/writer.py b/warcprox/writer.py index 5187d08..cc44be2 100644 --- a/warcprox/writer.py +++ b/warcprox/writer.py @@ -170,11 +170,14 @@ class WarcWriter: except Exception as exc: self.logger.error( 'could not unlock file %s (%s)', self.path, exc) - self.f.close() - finalpath = os.path.sep.join( - [self.directory, self.finalname]) - os.rename(self.path, finalpath) - + try: + self.f.close() + finalpath = os.path.sep.join( + [self.directory, self.finalname]) + os.rename(self.path, finalpath) + except Exception as exc: + self.logger.error( + 'could not close and rename file %s (%s)', self.path, exc) self.path = None self.f = None