diff --git a/pywb/manager/manager.py b/pywb/manager/manager.py index 78d8e3af..4a167cd6 100644 --- a/pywb/manager/manager.py +++ b/pywb/manager/manager.py @@ -7,6 +7,7 @@ import re import gzip import six +import pathlib from distutils.util import strtobool from pkg_resources import resource_string, get_distribution @@ -149,8 +150,11 @@ def add_archives(self, archives, unpack_wacz=False): def _rename_warc(self, warc_basename): dupe_idx = 1 + ext = ''.join(pathlib.Path(warc_basename).suffixes) + pre_ext_name = warc_basename.split(ext)[0] + while True: - new_basename = f'{warc_basename}-{dupe_idx}' + new_basename = f'{pre_ext_name}-{dupe_idx}{ext}' if not os.path.exists(os.path.join(self.archive_dir, new_basename)): break dupe_idx += 1 diff --git a/tests/test_manager.py b/tests/test_manager.py index cc136a8c..6c0abe85 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -65,6 +65,21 @@ def test_add_valid_archives_unpack_wacz(self, tmp_path): assert archive in os.listdir(manager.archive_dir) assert archive in index_text + def test_add_valid_archives_dupe_name(self, tmp_path): + manager = self.get_test_collections_manager(tmp_path) + warc_filename = 'sample_archive/warcs/example.warc.gz' + manager.add_archives(warc_filename) + manager.add_archives(warc_filename) + + with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f: + index_text = f.read() + + expected_archives = ('example.warc.gz', 'example-1.warc.gz') + + for archive in expected_archives: + assert archive in os.listdir(manager.archive_dir) + assert archive in index_text + def test_add_valid_archives_dont_unpack_wacz(self, tmp_path): manager = self.get_test_collections_manager(tmp_path) archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',