Skip to content

Commit

Permalink
Add remove method to ZipFile (#111)
Browse files Browse the repository at this point in the history
* Add remove method to ZipFile

Refer to: python/cpython#103033

* Make use of `ZipFileWithRemove`
  • Loading branch information
bpepple authored Aug 31, 2024
1 parent a157c27 commit c779b46
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 81 deletions.
8 changes: 0 additions & 8 deletions .github/workflows/ruff.yml

This file was deleted.

88 changes: 33 additions & 55 deletions darkseid/archivers/zip.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

import logging
import shutil
import tempfile
import zipfile
from typing import TYPE_CHECKING

from darkseid.zipfile_remove import ZipFileWithRemove

if TYPE_CHECKING:
from pathlib import Path
from typing import cast

import rarfile

Expand Down Expand Up @@ -71,8 +70,20 @@ def remove_file(self: ZipArchiver, archive_file: str) -> bool:
Returns:
bool: True if the file was successfully removed, False otherwise.
"""

return self._rebuild([archive_file])
try:
with ZipFileWithRemove(self.path, "a") as zf:
zf.remove(archive_file)
except KeyError:
return False
except (zipfile.BadZipfile, OSError):
logger.exception(
"Error writing zip archive %s :: %s",
self.path,
archive_file,
)
return False
else:
return True

def remove_files(self: ZipArchiver, filename_lst: list[str]) -> bool:
"""
Expand All @@ -84,8 +95,20 @@ def remove_files(self: ZipArchiver, filename_lst: list[str]) -> bool:
Returns:
bool: True if all files were successfully removed, False otherwise.
"""

return self._rebuild(filename_lst)
files = set(self.get_filename_list())
if filenames_to_remove := [filename for filename in filename_lst if filename in files]:
try:
with ZipFileWithRemove(self.path, "a") as zf:
for filename in filenames_to_remove:
zf.remove(filename)
except (zipfile.BadZipfile, OSError):
logger.exception(
"Error writing zip archive %s :: %s",
self.path,
filename,
)
return False
return True

def write_file(self: ZipArchiver, archive_file: str, data: str) -> bool:
"""
Expand All @@ -98,22 +121,10 @@ def write_file(self: ZipArchiver, archive_file: str, data: str) -> bool:
Returns:
bool: True if the write operation was successful, False otherwise.
"""

# At the moment, no other option but to rebuild the whole
# zip archive w/o the indicated file. Very sucky, but maybe
# another solution can be found
files = self.get_filename_list()
if archive_file in files:
self._rebuild([archive_file])

try:
# now just add the archive file as a new one
with zipfile.ZipFile(
self.path,
mode="a",
allowZip64=True,
compression=zipfile.ZIP_DEFLATED,
) as zf:
with ZipFileWithRemove(self.path, "a") as zf:
if archive_file in set(zf.namelist()):
zf.remove(archive_file)
zf.writestr(archive_file, data)
except (zipfile.BadZipfile, OSError):
logger.exception(
Expand Down Expand Up @@ -143,39 +154,6 @@ def get_filename_list(self: ZipArchiver) -> list[str]:
logger.exception("Error listing files in zip archive: %s", self.path)
return []

def _rebuild(self: ZipArchiver, exclude_list: list[str]) -> bool:
"""
Rebuilds the ZIP archive excluding specified files.
Args:
exclude_list (list[str]): The list of files to exclude from the rebuild.
Returns:
bool: True if the rebuild was successful, False otherwise.
"""

try:
with zipfile.ZipFile(
tempfile.NamedTemporaryFile(dir=self.path.parent, delete=False),
"w",
allowZip64=True,
) as zout:
with zipfile.ZipFile(self.path, mode="r") as zin:
for item in zin.infolist():
buffer = zin.read(item.filename)
if item.filename not in exclude_list:
zout.writestr(item, buffer)

# replace with the new file
self.path.unlink(missing_ok=True)
zout.close() # Required on Windows
shutil.move(cast(str, zout.filename), self.path)
except (zipfile.BadZipfile, OSError):
logger.exception("Error rebuilding zip file: %s", self.path)
return False
else:
return True

def copy_from_archive(self: ZipArchiver, other_archive: Archiver) -> bool:
"""
Copies files from another archive to the ZIP archive.
Expand Down
93 changes: 93 additions & 0 deletions darkseid/zipfile_remove/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""ZipFile with Remove method."""

# From https://github.com/python/cpython/pull/103033
# Not linted to compare against above PR
import contextlib
from zipfile import ZipFile, ZipInfo


class ZipFileWithRemove(ZipFile):
"""ZipFile with Remove method."""

def remove(self, zinfo_or_arcname):
"""Remove a member from the archive."""
if self.mode not in ("w", "x", "a"):
raise ValueError("remove() requires mode 'w', 'x', or 'a'")
if not self.fp:
raise ValueError("Attempt to write to ZIP archive that was already closed")
if self._writing:
raise ValueError("Can't write to ZIP archive while an open writing handle exists")

# Make sure we have an existing info object
if isinstance(zinfo_or_arcname, ZipInfo):
zinfo = zinfo_or_arcname
# make sure zinfo exists
if zinfo not in self.filelist:
raise KeyError("There is no item %r in the archive" % zinfo_or_arcname)
else:
# get the info object
zinfo = self.getinfo(zinfo_or_arcname)

return self._remove_members({zinfo})

def _remove_members(self, members, *, remove_physical=True, chunk_size=2**20):
"""Remove members in a zip file.
All members (as zinfo) should exist in the zip; otherwise the zip file
will erroneously end in an inconsistent state.
"""
fp = self.fp
entry_offset = 0
member_seen = False

# get a sorted filelist by header offset, in case the dir order
# doesn't match the actual entry order
filelist = sorted(self.filelist, key=lambda x: x.header_offset)
for i in range(len(filelist)):
info = filelist[i]
is_member = info in members

if not (member_seen or is_member):
continue

# get the total size of the entry
try:
offset = filelist[i + 1].header_offset
except IndexError:
offset = self.start_dir
entry_size = offset - info.header_offset

if is_member:
member_seen = True
entry_offset += entry_size

# update caches
self.filelist.remove(info)
with contextlib.suppress(KeyError):
del self.NameToInfo[info.filename]
continue

# update the header and move entry data to the new position
if remove_physical:
old_header_offset = info.header_offset
info.header_offset -= entry_offset
read_size = 0
while read_size < entry_size:
fp.seek(old_header_offset + read_size)
data = fp.read(min(entry_size - read_size, chunk_size))
fp.seek(info.header_offset + read_size)
fp.write(data)
fp.flush()
read_size += len(data)

# Avoid missing entry if entries have a duplicated name.
# Reverse the order as NameToInfo normally stores the last added one.
for info in reversed(self.filelist):
self.NameToInfo.setdefault(info.filename, info)

# update state
if remove_physical:
self.start_dir -= entry_offset
self._didModify = True

# seek to the start of the central dir
fp.seek(self.start_dir)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ testpaths = "tests"
exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,dist/*,node_modules/*,test-results/*,typings/*"

[tool.ruff]
extend-exclude = ["typings"]
extend-exclude = ["node_modules", "darkseid/zipfile_remove"]
target-version = "py310"
line-length = 100

Expand Down
17 changes: 0 additions & 17 deletions tests/test_archiver_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,6 @@ def test_get_filename_list(zip_archiver, archive_file, data):
assert archive_file in filenames


@pytest.mark.parametrize(
("exclude_list", "data"), [(["test.txt"], "Hello, World!")], ids=["exclude_file"]
)
def test_rebuild(zip_archiver, exclude_list, data):
# Arrange
zip_archiver.write_file("test.txt", data)
zip_archiver.write_file("keep.txt", "Keep this file")

# Act
result = zip_archiver._rebuild(exclude_list) # noqa: SLF001

# Assert
assert result is True
assert "test.txt" not in zip_archiver.get_filename_list()
assert "keep.txt" in zip_archiver.get_filename_list()


@pytest.mark.parametrize(
("other_archive_files", "data"), [(["test.txt"], ["Hello, World!"])], ids=["simple_copy"]
)
Expand Down

0 comments on commit c779b46

Please sign in to comment.