1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Fix for WACZ as well

This commit is contained in:
Tessa Walsh 2024-04-24 12:15:25 +02:00
parent 8d5b2be4c4
commit ee15a3e06f

View File

@ -147,7 +147,7 @@ directory structure expected by pywb
if invalid_archives: if invalid_archives:
logging.warning(f'Invalid archives weren\'t added: {", ".join(invalid_archives)}') logging.warning(f'Invalid archives weren\'t added: {", ".join(invalid_archives)}')
def _rename_warc(self, source_dir, warc_basename): def _rename_warc(self, warc_basename):
dupe_idx = 1 dupe_idx = 1
while True: while True:
new_basename = f'{warc_basename}-{dupe_idx}' new_basename = f'{warc_basename}-{dupe_idx}'
@ -163,7 +163,7 @@ directory structure expected by pywb
# don't overwrite existing warcs with duplicate names # don't overwrite existing warcs with duplicate names
if os.path.exists(os.path.join(self.archive_dir, warc_basename)): if os.path.exists(os.path.join(self.archive_dir, warc_basename)):
warc_basename = self._rename_warc(source_dir, warc_basename) warc_basename = self._rename_warc(warc_basename)
logging.info(f'Warc {os.path.basename(warc)} already exists - renamed to {warc_basename}.') logging.info(f'Warc {os.path.basename(warc)} already exists - renamed to {warc_basename}.')
warc_dest = os.path.join(self.archive_dir, warc_basename) warc_dest = os.path.join(self.archive_dir, warc_basename)
@ -209,8 +209,9 @@ directory structure expected by pywb
warc_destination_path = os.path.join(self.archive_dir, warc_filename) warc_destination_path = os.path.join(self.archive_dir, warc_filename)
if os.path.exists(warc_destination_path): if os.path.exists(warc_destination_path):
logging.warning(f'Warc {warc_filename} wasn\'t added because of duplicate name.') warc_filename = self._rename_warc(warc_filename)
continue logging.info(f'Warc {warc_destination_path} already exists - renamed to {warc_filename}.')
warc_destination_path = os.path.join(self.archive_dir, warc_filename)
warc_filename_mapping[os.path.basename(extracted_warc_file)] = warc_filename warc_filename_mapping[os.path.basename(extracted_warc_file)] = warc_filename
shutil.copy2(os.path.join(temp_dir, extracted_warc_file), warc_destination_path) shutil.copy2(os.path.join(temp_dir, extracted_warc_file), warc_destination_path)