Skip to content

Commit

Permalink
Fix for WACZ as well
Browse files Browse the repository at this point in the history
  • Loading branch information
tw4l committed Apr 24, 2024
1 parent 8d5b2be commit ee15a3e
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions pywb/manager/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def add_archives(self, archives, unpack_wacz=False):
if invalid_archives:
logging.warning(f'Invalid archives weren\'t added: {", ".join(invalid_archives)}')

def _rename_warc(self, source_dir, warc_basename):
def _rename_warc(self, warc_basename):
dupe_idx = 1
while True:
new_basename = f'{warc_basename}-{dupe_idx}'
Expand All @@ -163,7 +163,7 @@ def _add_warc(self, warc):

# don't overwrite existing warcs with duplicate names
if os.path.exists(os.path.join(self.archive_dir, warc_basename)):
warc_basename = self._rename_warc(source_dir, warc_basename)
warc_basename = self._rename_warc(warc_basename)
logging.info(f'Warc {os.path.basename(warc)} already exists - renamed to {warc_basename}.')

warc_dest = os.path.join(self.archive_dir, warc_basename)
Expand Down Expand Up @@ -209,8 +209,9 @@ def _add_wacz_unpacked(self, wacz):
warc_destination_path = os.path.join(self.archive_dir, warc_filename)

if os.path.exists(warc_destination_path):
logging.warning(f'Warc {warc_filename} wasn\'t added because of duplicate name.')
continue
warc_filename = self._rename_warc(warc_filename)
logging.info(f'Warc {warc_destination_path} already exists - renamed to {warc_filename}.')
warc_destination_path = os.path.join(self.archive_dir, warc_filename)

warc_filename_mapping[os.path.basename(extracted_warc_file)] = warc_filename
shutil.copy2(os.path.join(temp_dir, extracted_warc_file), warc_destination_path)
Expand Down

0 comments on commit ee15a3e

Please sign in to comment.