mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Fix dupe renaming and add additional test for warc.gz
This commit is contained in:
parent
ee15a3e06f
commit
52c5b84b1f
@ -7,6 +7,7 @@ import yaml
|
|||||||
import re
|
import re
|
||||||
import gzip
|
import gzip
|
||||||
import six
|
import six
|
||||||
|
import pathlib
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from pkg_resources import resource_string, get_distribution
|
from pkg_resources import resource_string, get_distribution
|
||||||
@ -149,8 +150,11 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
def _rename_warc(self, warc_basename):
|
def _rename_warc(self, warc_basename):
|
||||||
dupe_idx = 1
|
dupe_idx = 1
|
||||||
|
ext = ''.join(pathlib.Path(warc_basename).suffixes)
|
||||||
|
pre_ext_name = warc_basename.split(ext)[0]
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
new_basename = f'{warc_basename}-{dupe_idx}'
|
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
|
||||||
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
|
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
|
||||||
break
|
break
|
||||||
dupe_idx += 1
|
dupe_idx += 1
|
||||||
|
@ -65,6 +65,21 @@ class TestManager:
|
|||||||
assert archive in os.listdir(manager.archive_dir)
|
assert archive in os.listdir(manager.archive_dir)
|
||||||
assert archive in index_text
|
assert archive in index_text
|
||||||
|
|
||||||
|
def test_add_valid_archives_dupe_name(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
warc_filename = 'sample_archive/warcs/example.warc.gz'
|
||||||
|
manager.add_archives(warc_filename)
|
||||||
|
manager.add_archives(warc_filename)
|
||||||
|
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
index_text = f.read()
|
||||||
|
|
||||||
|
expected_archives = ('example.warc.gz', 'example-1.warc.gz')
|
||||||
|
|
||||||
|
for archive in expected_archives:
|
||||||
|
assert archive in os.listdir(manager.archive_dir)
|
||||||
|
assert archive in index_text
|
||||||
|
|
||||||
def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
|
def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
|
||||||
manager = self.get_test_collections_manager(tmp_path)
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
|
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user