From 3050fd2b2b560452eeb7a1cfd835290f416e1053 Mon Sep 17 00:00:00 2001 From: oskarhek <53567709+oskarhek@users.noreply.github.com> Date: Thu, 5 Jan 2023 23:15:49 +0100 Subject: [PATCH] issue_792 catch warcio exception (#793) --- pywb/indexer/cdxindexer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pywb/indexer/cdxindexer.py b/pywb/indexer/cdxindexer.py index 6e7a2ffe..23cb3490 100644 --- a/pywb/indexer/cdxindexer.py +++ b/pywb/indexer/cdxindexer.py @@ -1,5 +1,9 @@ +import logging import os import sys +import traceback + +import warcio # Use ujson if available try: @@ -298,8 +302,11 @@ def write_multi_cdx_index(output, inputs, **options): with open(fullpath, 'rb') as infile: entry_iter = record_iter(infile) - for entry in entry_iter: - writer.write(entry, filename) + try: + for entry in entry_iter: + writer.write(entry, filename) + except warcio.exceptions.ArchiveLoadFailed: + logging.error('Error while indexing file %s, %s',filename,traceback.format_exc()) return writer