Skip to content

Commit

Permalink
Fix dupe renaming and add additional test for warc.gz
Browse files Browse the repository at this point in the history
  • Loading branch information
tw4l committed Apr 24, 2024
1 parent ee15a3e commit 52c5b84
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
6 changes: 5 additions & 1 deletion pywb/manager/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import gzip
import six
import pathlib

from distutils.util import strtobool
from pkg_resources import resource_string, get_distribution
Expand Down Expand Up @@ -149,8 +150,11 @@ def add_archives(self, archives, unpack_wacz=False):

def _rename_warc(self, warc_basename):
dupe_idx = 1
ext = ''.join(pathlib.Path(warc_basename).suffixes)
pre_ext_name = warc_basename.split(ext)[0]

while True:
new_basename = f'{warc_basename}-{dupe_idx}'
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
break
dupe_idx += 1
Expand Down
15 changes: 15 additions & 0 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@ def test_add_valid_archives_unpack_wacz(self, tmp_path):
assert archive in os.listdir(manager.archive_dir)
assert archive in index_text

def test_add_valid_archives_dupe_name(self, tmp_path):
manager = self.get_test_collections_manager(tmp_path)
warc_filename = 'sample_archive/warcs/example.warc.gz'
manager.add_archives(warc_filename)
manager.add_archives(warc_filename)

with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
index_text = f.read()

expected_archives = ('example.warc.gz', 'example-1.warc.gz')

for archive in expected_archives:
assert archive in os.listdir(manager.archive_dir)
assert archive in index_text

def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
manager = self.get_test_collections_manager(tmp_path)
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
Expand Down

0 comments on commit 52c5b84

Please sign in to comment.