From bc84c2fda0bcc77ad106b9a82c0660c616a956be Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 9 Mar 2016 07:27:03 -0800 Subject: [PATCH] indexing: declare 'record' and bail if no record was loaded, add test for empty file indexing, fixes #168 --- pywb/warc/archiveiterator.py | 4 +++- pywb/warc/test/test_indexing.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pywb/warc/archiveiterator.py b/pywb/warc/archiveiterator.py index 5df3bb03..bc23e527 100644 --- a/pywb/warc/archiveiterator.py +++ b/pywb/warc/archiveiterator.py @@ -80,6 +80,7 @@ class ArchiveIterator(object): raise_invalid_gzip = False empty_record = False + record = None while True: try: @@ -93,7 +94,8 @@ class ArchiveIterator(object): except EOFError: empty_record = True - self.read_to_end(record) + if record: + self.read_to_end(record) if self.reader.decompressor: # if another gzip member, continue diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 1c8f4554..552bdef9 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -329,6 +329,17 @@ com,example)/ 20140216050221 {"url": "http://example.com/", "digest": "PEWDX5GTH """) +def test_cdxj_empty(): + options = dict(cdxj=True) + + buff = BytesIO() + + empty = BytesIO() + + write_cdx_index(buff, empty, 'empty.warc.gz', **options) + + assert buff.getvalue() == b'' + if __name__ == "__main__":