mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Fix dupe renaming and add additional test for warc.gz
This commit is contained in:
parent
ee15a3e06f
commit
52c5b84b1f
@ -7,6 +7,7 @@ import yaml
|
||||
import re
|
||||
import gzip
|
||||
import six
|
||||
import pathlib
|
||||
|
||||
from distutils.util import strtobool
|
||||
from pkg_resources import resource_string, get_distribution
|
||||
@ -149,8 +150,11 @@ directory structure expected by pywb
|
||||
|
||||
def _rename_warc(self, warc_basename):
|
||||
dupe_idx = 1
|
||||
ext = ''.join(pathlib.Path(warc_basename).suffixes)
|
||||
pre_ext_name = warc_basename.split(ext)[0]
|
||||
|
||||
while True:
|
||||
new_basename = f'{warc_basename}-{dupe_idx}'
|
||||
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
|
||||
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
|
||||
break
|
||||
dupe_idx += 1
|
||||
|
@ -65,6 +65,21 @@ class TestManager:
|
||||
assert archive in os.listdir(manager.archive_dir)
|
||||
assert archive in index_text
|
||||
|
||||
def test_add_valid_archives_dupe_name(self, tmp_path):
|
||||
manager = self.get_test_collections_manager(tmp_path)
|
||||
warc_filename = 'sample_archive/warcs/example.warc.gz'
|
||||
manager.add_archives(warc_filename)
|
||||
manager.add_archives(warc_filename)
|
||||
|
||||
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||
index_text = f.read()
|
||||
|
||||
expected_archives = ('example.warc.gz', 'example-1.warc.gz')
|
||||
|
||||
for archive in expected_archives:
|
||||
assert archive in os.listdir(manager.archive_dir)
|
||||
assert archive in index_text
|
||||
|
||||
def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
|
||||
manager = self.get_test_collections_manager(tmp_path)
|
||||
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
|
||||
|
Loading…
x
Reference in New Issue
Block a user