From ca36407a65cf81196dac37cae680d282aba07c42 Mon Sep 17 00:00:00 2001 From: Webb Scales <7795764+webbnh@users.noreply.github.com> Date: Wed, 2 Aug 2023 12:24:03 -0400 Subject: [PATCH] Attempt to improve upload performance (#3501) Use subprocess tar command to extract files from tarball PBENCH-1226 --- lib/pbench/server/__init__.py | 5 + lib/pbench/server/cache_manager.py | 240 +++++-- .../test/unit/server/test_cache_manager.py | 636 +++++++++++++----- 3 files changed, 631 insertions(+), 250 deletions(-) diff --git a/lib/pbench/server/__init__.py b/lib/pbench/server/__init__.py index 346eb81931..ee2f8cdf4c 100644 --- a/lib/pbench/server/__init__.py +++ b/lib/pbench/server/__init__.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta, tzinfo from enum import auto, Enum from logging import Logger +import os from pathlib import Path from time import time as _time from typing import Dict, List, Optional, Union @@ -21,6 +22,10 @@ JSONOBJECT = Dict[JSONSTRING, JSONVALUE] JSON = JSONVALUE +# Define a type hint for "Path-like" parameters, so the cumbersomeness of the +# type won't discourage us from using it where it is warranted. +PathLike = Union[str, bytes, os.PathLike] + class OperationCode(Enum): """Enumeration for CRUD operations. diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 4715d48193..8de5ff964b 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -6,11 +6,11 @@ import shlex import shutil import subprocess -import tarfile +import time from typing import Any, IO, Optional, Union from pbench.common import MetadataLog, selinux -from pbench.server import JSONOBJECT, PbenchServerConfig +from pbench.server import JSONOBJECT, PathLike, PbenchServerConfig from pbench.server.database.models.datasets import Dataset from pbench.server.utils import get_tarball_md5 @@ -35,7 +35,7 @@ def __str__(self) -> str: class BadFilename(CacheManagerError): """A bad tarball path was given.""" - def __init__(self, path: Union[str, Path]): + def __init__(self, path: PathLike): self.path = str(path) def __str__(self) -> str: @@ -45,7 +45,7 @@ def __str__(self) -> str: class CacheExtractBadPath(CacheManagerError): """Request to extract a path that's bad or not a file""" - def __init__(self, tar_name: Path, path: Union[str, Path]): + def __init__(self, tar_name: Path, path: PathLike): self.name = tar_name.name self.path = str(path) @@ -85,7 +85,7 @@ def __str__(self) -> str: class TarballUnpackError(CacheManagerError): - """An error occured trying to unpack a tarball.""" + """An error occurred trying to unpack a tarball.""" def __init__(self, tarball: Path, error: str): self.tarball = tarball @@ -134,6 +134,14 @@ class CacheObject: type: CacheType +# Type hint definitions for the cache map. +# +# CacheMapEntry: { "details": CacheObject, "children": CacheMap } +# CacheMap: { "": CacheMapEntry, ... } +CacheMapEntry = dict[str, Union[CacheObject, "CacheMap"]] +CacheMap = dict[str, CacheMapEntry] + + def make_cache_object(dir_path: Path, path: Path) -> CacheObject: """Collects the file info @@ -185,14 +193,14 @@ def make_cache_object(dir_path: Path, path: Path) -> CacheObject: class Inventory: - """Encapsulate the tarfile TarFile object and file stream + """Encapsulate the file stream and subprocess.Popen object This encapsulation allows cleaner downstream handling, so that we can close - both the extractfile file stream and the tarball object itself when we're - done. This eliminates interference with later operations. + both the extracted file stream and the Popen object itself when we're done. + This eliminates interference with later operations. """ - def __init__(self, stream: IO[bytes], tarfile: Optional[tarfile.TarFile] = None): + def __init__(self, stream: IO[bytes], subproc: Optional[subprocess.Popen] = None): """Construct an instance to track extracted inventory This encapsulates many byte stream operations so that it can be used @@ -200,16 +208,40 @@ def __init__(self, stream: IO[bytes], tarfile: Optional[tarfile.TarFile] = None) Args: stream: the data stream of a specific tarball member - tarfile: the TarFile object + subproc: the subprocess producing the stream, if any """ - self.tarfile = tarfile + self.subproc = subproc self.stream = stream def close(self): - """Close both the byte stream and, if open, a tarfile object""" + """Close the byte stream and clean up the Popen object""" + if self.subproc: + try: + if self.subproc.poll() is None: + # The subprocess is still running: kill it, drain the outputs, + # and wait for its termination. (If the timeout on the wait() + # is exceeded, it will raise subprocess.TimeoutExpired rather + # than waiting forever...it's not clear what will happen after + # that, but there's not a good alternative, so I hope this + # never actually happens.) + self.subproc.kill() + if self.subproc.stdout: + while self.subproc.stdout.read(4096): + pass + if self.subproc.stderr: + while self.subproc.stderr.read(4096): + pass + self.subproc.wait(60.0) + finally: + # Release our reference to the subprocess.Popen object so that the + # object can be reclaimed. + self.subproc = None + + # Explicitly close the stream, in case there never was an associated + # subprocess. (If there was an associated subprocess, the streams are + # now empty, and we'll assume that they are closed when the Popen + # object is reclaimed.) self.stream.close() - if self.tarfile: - self.tarfile.close() def getbuffer(self): """Return the underlying byte buffer (used by send_file)""" @@ -229,7 +261,7 @@ def seek(self, *args, **kwargs) -> int: def __repr__(self) -> str: """Return a string representation""" - return f"" + return f"" def __iter__(self): """Allow iterating through lines in the buffer""" @@ -255,6 +287,11 @@ class Tarball: database representations of a dataset. """ + # Wait no more than a minute for the tar(1) command to start producing + # output; perform the wait in 0.02s increments. + TAR_EXEC_TIMEOUT = 60.0 + TAR_EXEC_WAIT = 0.02 + def __init__(self, path: Path, controller: "Controller"): """Construct a `Tarball` object instance @@ -282,7 +319,7 @@ def __init__(self, path: Path, controller: "Controller"): self.cache: Path = controller.cache / self.resource_id # Record hierarchy of a Tar ball - self.cachemap: Optional[JSONOBJECT] = None + self.cachemap: Optional[CacheMap] = None # Record the base of the unpacked files for cache management, which # is (self.cache / self.name) and will be None when the cache is @@ -362,11 +399,11 @@ def create(cls, tarball: Path, controller: "Controller") -> "Tarball": except Exception as e: try: md5_destination.unlink() - except Exception as e: + except Exception as md5_e: controller.logger.error( "Unable to recover by removing {} MD5 after tarball copy failure: {}", name, - e, + md5_e, ) controller.logger.error( "ERROR copying dataset {} tarball {}: {}", name, tarball, e @@ -399,13 +436,15 @@ def cache_map(self, dir_path: Path): dir_path: root directory """ root_dir_path = dir_path.parent - cmap = {dir_path.name: {"details": make_cache_object(root_dir_path, dir_path)}} - dir_queue = deque([(dir_path, cmap)]) + cmap: CacheMap = { + dir_path.name: {"details": make_cache_object(root_dir_path, dir_path)} + } + dir_queue = deque(((dir_path, cmap),)) while dir_queue: dir_path, parent_map = dir_queue.popleft() tar_n = dir_path.name - curr = {} + curr: CacheMapEntry = {} for l_path in dir_path.glob("*"): tar_info = make_cache_object(root_dir_path, l_path) curr[l_path.name] = {"details": tar_info} @@ -418,13 +457,13 @@ def cache_map(self, dir_path: Path): self.cachemap = cmap @staticmethod - def traverse_cmap(path: Path, cachemap: dict) -> dict[str, dict]: + def traverse_cmap(path: Path, cachemap: CacheMap) -> CacheMapEntry: """Sequentially traverses the cachemap to find the leaf of a relative path reference Args: - path: relative path of the sub-directory/file - cachemap: dictionary mapping of the root Dicrectory + path: relative path of the subdirectory/file + cachemap: dictionary mapping of the root directory Raises: BadDirpath if the directory/file path is not valid @@ -437,9 +476,9 @@ def traverse_cmap(path: Path, cachemap: dict) -> dict[str, dict]: try: for file_l in file_list: - info = f_entries[file_l] + info: CacheMapEntry = f_entries[file_l] if info["details"].type == CacheType.DIRECTORY: - f_entries = info["children"] + f_entries: CacheMap = info["children"] else: raise BadDirpath( f"Found a file {file_l!r} where a directory was expected in path {str(path)!r}" @@ -454,13 +493,13 @@ def get_info(self, path: Path) -> JSONOBJECT: """Returns the details of the given file/directory in dict format NOTE: This requires a call to the cache_map method to build a map that - can be traversed. Currently this is done only on unpack, and isn't + can be traversed. Currently, this is done only on unpack, and isn't useful except within the `pbench-index` process. This map needs to either be built dynamically (potentially expensive) or persisted in SQL or perhaps Redis. Args: - path: path of the file/sub-directory + path: path of the file/subdirectory Raises: BadDirpath on bad directory path @@ -506,7 +545,7 @@ def get_info(self, path: Path) -> JSONOBJECT: return fd_info @staticmethod - def extract(tarball_path: Path, path: Path) -> Inventory: + def extract(tarball_path: Path, path: str) -> Inventory: """Returns a file stream for a file within a tarball Args: @@ -515,25 +554,87 @@ def extract(tarball_path: Path, path: Path) -> Inventory: Returns: An inventory object that mimics an IO[bytes] object while also - maintaining a reference to the tarfile TarFile object to be - closed later. + maintaining a reference to the subprocess.Popen object to be + cleaned up later. Raise: - TarballNotFound on failure opening the tarball CacheExtractBadPath if the target cannot be extracted + TarballUnpackError on other tar-command failures + Any exception raised by subprocess.Popen() + subprocess.TimeoutExpired if the tar command hangs """ - try: - tar = tarfile.open(tarball_path, "r:*") - except Exception as exc: - raise TarballNotFound(str(tarball_path)) from exc - try: - stream = tar.extractfile(str(path)) - except Exception as exc: - raise CacheExtractBadPath(tarball_path, path) from exc - else: - if not stream: - raise CacheExtractBadPath(tarball_path, path) - return Inventory(stream, tar) + tar_path = shutil.which("tar") + if tar_path is None: + raise TarballUnpackError( + tarball_path, "External 'tar' executable not found" + ) + + # The external tar utility offers better capabilities than the + # Standard Library package, so run it in a subprocess: extract + # the target member from the specified tar archive and direct it to + # stdout; we expect only one occurrence of the target member, so stop + # processing as soon as we find it instead of looking for additional + # instances of it later in the archive -- this is a huge savings when + # the archive is very large. + tar_command = [ + str(tar_path), + "xf", + tarball_path, + "--to-stdout", + "--occurrence=1", + path, + ] + tarproc = subprocess.Popen( + tar_command, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + # Wait for the tar(1) command to start producing output, but stop + # waiting if the subprocess exits or if it takes too long. + start = time.time() + while not tarproc.stdout.peek(): + if tarproc.poll() is not None: + break + + elapsed = time.time() - start + if elapsed > Tarball.TAR_EXEC_TIMEOUT: + # No signs of life from the subprocess. Kill it to ensure that + # the Python runtime can clean it up after we leave, and report + # the failure. + tarproc.kill() + raise subprocess.TimeoutExpired( + cmd=tar_command, + timeout=elapsed, + output=tarproc.stdout, + stderr=tarproc.stderr, + ) + + time.sleep(Tarball.TAR_EXEC_WAIT) + + # If the return code is None (meaning the command is still running) or + # is zero (meaning it completed successfully), then return the stream + # containing the extracted file to our caller, and return the Popen + # object so that we can clean it up when the Inventory object is closed. + if not tarproc.returncode: + return Inventory(tarproc.stdout, tarproc) + + # The tar command was invoked successfully (otherwise, the Popen() + # constructor would have raised an exception), but it exited with + # an error code. We have to glean what went wrong by looking at + # stderr, which is fragile but the only option. Rather than + # relying on looking for specific text, we assume that, if the error + # references the archive member, then it was a bad path; otherwise, it + # was some sort of error unpacking it. + error_text = tarproc.stderr.read().decode() + if path in error_text: + # "tar: missing_member.txt: Not found in archive" + raise CacheExtractBadPath(tarball_path, path) + # "tar: /path/to/bad_tarball.tar.xz: Cannot open: No such file or directory" + raise TarballUnpackError( + tarball_path, f"Unexpected error from {tar_path}: {error_text!r}" + ) def get_inventory(self, path: str) -> Optional[JSONOBJECT]: """Access the file stream of a tarball member file. @@ -551,9 +652,9 @@ def get_inventory(self, path: str) -> Optional[JSONOBJECT]: "stream": Inventory(self.tarball_path.open("rb"), None), } else: - file_path = Path(self.name) / path + file_path = f"{self.name}/{path}" stream = Tarball.extract(self.tarball_path, file_path) - info = {"name": file_path.name, "type": CacheType.FILE, "stream": stream} + info = {"name": file_path, "type": CacheType.FILE, "stream": stream} return info @@ -579,7 +680,10 @@ def _get_metadata(tarball_path: Path) -> Optional[JSONOBJECT]: @staticmethod def subprocess_run( - command: str, working_dir: Path, exception: type[CacheManagerError], ctx: Path + command: str, + working_dir: PathLike, + exception: type[CacheManagerError], + ctx: Path, ): """Runs a command as a subprocess. @@ -629,14 +733,15 @@ def unpack(self): Rather than passing the indexer the root `/srv/pbench/.cache` or trying to update all of the indexer code (which still jumps back and forth between the tarball and the unpacked files), we maintain the "cache" - directory as two paths: self.cache which is the directory we manage + directory as two paths: self.cache which is the directory we manage here and pass to the indexer (/srv/pbench/.cache/) and the actual unpacked root (/srv/pbench/.cache//). """ self.cache.mkdir(parents=True) try: - tar_command = f"tar -x --no-same-owner --delay-directory-restore --force-local --file='{str(self.tarball_path)}'" + tar_command = "tar -x --no-same-owner --delay-directory-restore " + tar_command += f"--force-local --file='{str(self.tarball_path)}'" self.subprocess_run( tar_command, self.cache, TarballUnpackError, self.tarball_path ) @@ -770,18 +875,18 @@ def create( controller_dir.mkdir(exist_ok=True, mode=0o755) return cls(controller_dir, options.CACHE, logger) - def create_tarball(self, tarfile: Path) -> Tarball: + def create_tarball(self, tarfile_path: Path) -> Tarball: """Create a new dataset tarball object under the controller The new tarball object is linked to the controller so we can find it. Args: - tarfile: Path to source tarball file + tarfile_path: Path to source tarball file Returns: Tarball object """ - tarball = Tarball.create(tarfile, self) + tarball = Tarball.create(tarfile_path, self) self.datasets[tarball.resource_id] = tarball self.tarballs[tarball.name] = tarball return tarball @@ -974,7 +1079,7 @@ def _discover_controllers(self): """Build a representation of the ARCHIVE tree Record all controllers (top level directories), and the tarballs that - that represent datasets within them. + represent datasets within them. """ for file in self.archive_root.iterdir(): if file.is_dir() and file.name != CacheManager.TEMPORARY: @@ -1008,12 +1113,12 @@ def find_dataset(self, dataset_id: str) -> Tarball: # The dataset isn't already known; so search for it in the ARCHIVE tree # and (if found) discover the controller containing that dataset. - for dir in self.archive_root.iterdir(): - if dir.is_dir() and dir.name != self.TEMPORARY: - for file in dir.glob(f"*{Dataset.TARBALL_SUFFIX}"): + for dir_entry in self.archive_root.iterdir(): + if dir_entry.is_dir() and dir_entry.name != self.TEMPORARY: + for file in dir_entry.glob(f"*{Dataset.TARBALL_SUFFIX}"): md5 = get_tarball_md5(file) if md5 == dataset_id: - self._add_controller(dir) + self._add_controller(dir_entry) return self.datasets[dataset_id] raise TarballNotFound(dataset_id) @@ -1037,7 +1142,7 @@ def find_dataset(self, dataset_id: str) -> Tarball: # Remove the tarball and MD5 file from ARCHIVE after uncaching the # unpacked directory tree. - def create(self, tarfile: Path) -> Tarball: + def create(self, tarfile_path: Path) -> Tarball: """Bring a new tarball under cache manager management. Move a dataset tarball and companion MD5 file into the specified @@ -1045,8 +1150,7 @@ def create(self, tarfile: Path) -> Tarball: necessary. Args: - controller: associated controller name - tarfile: dataset tarball path + tarfile_path: dataset tarball path Raises BadDirpath: Failure on extracting the file from tarball @@ -1059,19 +1163,19 @@ def create(self, tarfile: Path) -> Tarball: Tarball object """ try: - metadata = Tarball._get_metadata(tarfile) + metadata = Tarball._get_metadata(tarfile_path) if metadata: controller_name = metadata["run"]["controller"] else: controller_name = "unknown" except Exception as exc: - raise MetadataError(tarfile, exc) + raise MetadataError(tarfile_path, exc) if not controller_name: - raise MetadataError(tarfile, "no controller value") - if not tarfile.is_file(): - raise BadFilename(tarfile) - name = Dataset.stem(tarfile) + raise MetadataError(tarfile_path, ValueError("no controller value")) + if not tarfile_path.is_file(): + raise BadFilename(tarfile_path) + name = Dataset.stem(tarfile_path) if name in self.tarballs: raise DuplicateTarball(name) if controller_name in self.controllers: @@ -1079,7 +1183,7 @@ def create(self, tarfile: Path) -> Tarball: else: controller = Controller.create(controller_name, self.options, self.logger) self.controllers[controller_name] = controller - tarball = controller.create_tarball(tarfile) + tarball = controller.create_tarball(tarfile_path) tarball.metadata = metadata self.tarballs[tarball.name] = tarball self.datasets[tarball.resource_id] = tarball @@ -1122,7 +1226,7 @@ def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: } Args: - dataset: Dataset resource ID + dataset_id: Dataset resource ID target: relative file path within the tarball Returns: diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 2bd59f4c2b..d483fa5153 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -6,8 +6,7 @@ import re import shutil import subprocess -import tarfile -from typing import IO +from typing import Optional import pytest @@ -71,10 +70,16 @@ def selinux_enabled(monkeypatch): monkeypatch.setattr("pbench.common.selinux.restorecon", lambda a: None) -def fake_get_metadata(tb_path): +def fake_get_metadata(_tb_path): return {"pbench": {"date": "2002-05-16T00:00:00"}, "run": {"controller": "ABC"}} +MEMBER_NOT_FOUND_MSG = b"mock-tar: metadata.log: Not found in mock-archive" +CANNOT_OPEN_MSG = ( + b"mock-tar: /mock/result.tar.xz: Cannot open: No such mock-file or mock-directory" +) + + class TestCacheManager: def test_create(self, server_config, make_logger): """ @@ -137,13 +142,13 @@ def test_metadata( ): """Test behavior with metadata.log access errors.""" - def fake_metadata(tar_path): + def fake_metadata(_tar_path): return {"pbench": {"date": "2002-05-16T00:00:00"}} - def fake_metadata_run(tar_path): + def fake_metadata_run(_tar_path): return {"pbench": {"date": "2002-05-16T00:00:00"}, "run": {}} - def fake_metadata_controller(tar_path): + def fake_metadata_controller(_tar_path): return { "pbench": {"date": "2002-05-16T00:00:00"}, "run": {"controller": ""}, @@ -168,7 +173,8 @@ def fake_metadata_controller(tar_path): cm.create(source_tarball) assert str(exc.value) == expected_metaerror - expected_metaerror = f"A problem occurred processing metadata.log from {source_tarball!s}: 'no controller value'" + expected_metaerror = "A problem occurred processing metadata.log " + expected_metaerror += f"from {source_tarball!s}: 'no controller value'" with monkeypatch.context() as m: m.setattr(Tarball, "_get_metadata", fake_metadata_controller) with pytest.raises(MetadataError) as exc: @@ -210,7 +216,7 @@ def test_create_bad( cm.create(source_tarball) - # The create will remove the source files, so trying again should + # The create call will remove the source files, so trying again should # result in an error. with pytest.raises(BadFilename) as exc: cm.create(source_md5) @@ -221,10 +227,8 @@ def test_create_bad( tarball = cm.find_dataset(md5) with pytest.raises(DuplicateTarball) as exc: cm.create(tarball.tarball_path) - assert ( - str(exc.value) - == "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present" - ) + msg = "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present" + assert str(exc.value) == msg assert tarball.metadata == fake_get_metadata(tarball.tarball_path) assert exc.value.tarball == tarball.name @@ -253,10 +257,10 @@ def test_duplicate( def test_tarball_subprocess_run_with_exception(self, monkeypatch): """Test to check the subprocess_run functionality of the Tarball when - an Exception occured""" + an Exception occurred""" my_command = "mycommand" - def mock_run(args, **kwargs): + def mock_run(args, **_kwargs): assert args[0] == my_command raise subprocess.TimeoutExpired(my_command, 43) @@ -266,18 +270,18 @@ def mock_run(args, **kwargs): my_dir = "my_dir" with pytest.raises(TarballUnpackError) as exc: - Tarball.subprocess_run(command, my_dir, TarballUnpackError, my_dir) - assert ( - str(exc.value) - == f"An error occurred while unpacking {my_dir}: Command '{my_command}' timed out after 43 seconds" - ) + Tarball.subprocess_run( + command, my_dir, TarballUnpackError, Path(my_dir) + ) + msg = f"An error occurred while unpacking {my_dir}: Command '{my_command}' timed out after 43 seconds" + assert str(exc.value) == msg def test_tarball_subprocess_run_with_returncode(self, monkeypatch): """Test to check the subprocess_run functionality of the Tarball when returncode value is not zero""" my_command = "mycommand" - def mock_run(args, **kwargs): + def mock_run(args, **_kwargs): assert args[0] == my_command return subprocess.CompletedProcess( args, returncode=1, stdout=None, stderr="Some error unpacking tarball\n" @@ -290,10 +294,10 @@ def mock_run(args, **kwargs): with pytest.raises(TarballUnpackError) as exc: Tarball.subprocess_run(command, my_dir, TarballUnpackError, my_dir) - assert ( - str(exc.value) - == f"An error occurred while unpacking {my_dir.name}: {my_command} exited with status 1: 'Some error unpacking tarball'" - ) + + msg = f"An error occurred while unpacking {my_dir.name}: {my_command} " + msg += "exited with status 1: 'Some error unpacking tarball'" + assert str(exc.value) == msg def test_tarball_subprocess_run_success(self, monkeypatch): """Test to check the successful run of subprocess_run functionality of the Tarball.""" @@ -340,7 +344,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: subdir11/ subdir12/ f121_sym -> ../../subdir1/subdir15 - f122_sym -> ./bad_subdir/nonexixtent_file.txt + f122_sym -> ./bad_subdir/nonexistent_file.txt subdir13/ f131_sym -> /etc/passwd subdir14/ @@ -437,7 +441,7 @@ def __init__(self, path: Path, controller: Controller): self.cache = controller.cache / "ABC" self.unpacked = None - def test_unpack_tar_subprocess_exception(self, monkeypatch): + def test_unpack_tar_subprocess_exception(self, make_logger, monkeypatch): """Show that, when unpacking of the Tarball fails and raises an Exception it is handled successfully.""" tar = Path("/mock/A.tar.xz") @@ -451,7 +455,7 @@ def mock_rmtree(path: Path, ignore_errors=False): assert ignore_errors assert path == cache / "ABC" - def mock_run(command, dir_path, exception, dir_p): + def mock_run(command, _dir_path, exception, dir_p): verb = "tar" assert command.startswith(verb) raise exception(dir_p, subprocess.TimeoutExpired(verb, 43)) @@ -462,17 +466,15 @@ def mock_run(command, dir_path, exception, dir_p): m.setattr(shutil, "rmtree", mock_rmtree) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) with pytest.raises(TarballUnpackError) as exc: tb.unpack() - assert ( - str(exc.value) - == f"An error occurred while unpacking {tar}: Command 'tar' timed out after 43 seconds" - ) + msg = f"An error occurred while unpacking {tar}: Command 'tar' timed out after 43 seconds" + assert str(exc.value) == msg assert exc.type == TarballUnpackError assert rmtree_called - def test_unpack_find_subprocess_exception(self, monkeypatch): + def test_unpack_find_subprocess_exception(self, make_logger, monkeypatch): """Show that, when permission change of the Tarball fails and raises an Exception it is handled successfully.""" tar = Path("/mock/A.tar.xz") @@ -486,7 +488,7 @@ def mock_rmtree(path: Path, ignore_errors=False): assert ignore_errors assert path == cache / "ABC" - def mock_run(command, dir_path, exception, dir_p): + def mock_run(command, _dir_path, exception, dir_p): verb = "find" if command.startswith(verb): raise exception(dir_p, subprocess.TimeoutExpired(verb, 43)) @@ -499,24 +501,23 @@ def mock_run(command, dir_path, exception, dir_p): m.setattr(shutil, "rmtree", mock_rmtree) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) with pytest.raises(TarballModeChangeError) as exc: tb.unpack() - assert ( - str(exc.value) - == f"An error occurred while changing file permissions of {cache / 'ABC'}: Command 'find' timed out after 43 seconds" - ) + msg = "An error occurred while changing file permissions of " + msg += f"{cache / 'ABC'}: Command 'find' timed out after 43 seconds" + assert str(exc.value) == msg assert exc.type == TarballModeChangeError assert rmtree_called - def test_unpack_success(self, monkeypatch): + def test_unpack_success(self, make_logger, monkeypatch): """Test to check the unpacking functionality of the CacheManager""" tar = Path("/mock/A.tar.xz") cache = Path("/mock/.cache") call = list() - def mock_run(args, **kwargs): + def mock_run(args, **_kwargs): call.append(args[0]) tar_target = "--file=" + str(tar) @@ -525,18 +526,23 @@ def mock_run(args, **kwargs): args, returncode=0, stdout="Successfully Unpacked!", stderr=None ) + def mock_resolve(_path, _strict=False): + """In this scenario, there are no symlinks, + so resolve() should never be called.""" + raise AssertionError("Unexpected call to Path.resolve()") + with monkeypatch.context() as m: m.setattr(Path, "mkdir", lambda path, parents: None) m.setattr(subprocess, "run", mock_run) - m.setattr(Path, "resolve", lambda path, strict: path) + m.setattr(Path, "resolve", mock_resolve) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tb.unpack() assert call == ["tar", "find"] assert tb.unpacked == cache / "ABC" / tb.name - def test_cache_map_success(self, monkeypatch, tmp_path): + def test_cache_map_success(self, make_logger, monkeypatch, tmp_path): """Test to build the cache map of the root directory""" tar = Path("/mock/dir_name.tar.xz") cache = Path("/mock/.cache") @@ -544,28 +550,25 @@ def test_cache_map_success(self, monkeypatch, tmp_path): with monkeypatch.context() as m: m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) tb.cache_map(tar_dir) - assert ( - tb.cachemap["dir_name"]["children"]["subdir1"]["details"].name - == "subdir1" - ) - assert ( - tb.cachemap["dir_name"]["children"]["subdir1"]["children"]["subdir14"][ - "children" - ]["subdir141"]["children"]["f1412_sym"]["details"].type - == CacheType.SYMLINK - ) + + sd1 = tb.cachemap["dir_name"]["children"]["subdir1"] + assert sd1["details"].name == "subdir1" + + sd141 = sd1["children"]["subdir14"]["children"]["subdir141"] + assert sd141["children"]["f1412_sym"]["details"].type == CacheType.SYMLINK @pytest.mark.parametrize( "file_path, expected_msg", [ ( "/dir_name/subdir1/f11.txt", - "The path '/dir_name/subdir1/f11.txt' is an absolute path, we expect relative path to the root directory.", + "The path '/dir_name/subdir1/f11.txt' is an absolute path, " + "we expect relative path to the root directory.", ), ( "dir_name/subdir1/subdir11/../f11.txt", @@ -593,12 +596,13 @@ def test_cache_map_success(self, monkeypatch, tmp_path): ), ( "dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file", - "Found a file 'f1412_sym' where a directory was expected in path 'dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file'", + "Found a file 'f1412_sym' where a directory was expected " + "in path 'dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file'", ), ], ) def test_cache_map_bad_dir_path( - self, monkeypatch, tmp_path, file_path, expected_msg + self, make_logger, monkeypatch, tmp_path, file_path, expected_msg ): """Test to check bad directory or file path""" tar = Path("/mock/dir_name.tar.xz") @@ -607,7 +611,7 @@ def test_cache_map_bad_dir_path( with monkeypatch.context() as m: m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) @@ -768,6 +772,7 @@ def test_cache_map_bad_dir_path( ) def test_cache_map_traverse_cmap( self, + make_logger, monkeypatch, tmp_path, file_path, @@ -785,7 +790,7 @@ def test_cache_map_traverse_cmap( with monkeypatch.context() as m: m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) @@ -862,7 +867,7 @@ def test_cache_map_traverse_cmap( ], ) def test_cache_map_get_info_cmap( - self, monkeypatch, tmp_path, file_path, expected_msg + self, make_logger, monkeypatch, tmp_path, file_path, expected_msg ): """Test to check if the info returned by the cachemap is correct""" tar = Path("/mock/dir_name.tar.xz") @@ -871,7 +876,7 @@ def test_cache_map_get_info_cmap( with monkeypatch.context() as m: m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) @@ -891,7 +896,7 @@ def test_cache_map_get_info_cmap( ], ) def test_get_inventory( - self, monkeypatch, tmp_path, file_path, exp_file_type, exp_stream + self, make_logger, monkeypatch, tmp_path, file_path, exp_file_type, exp_stream ): """Test to extract file contents/stream from a file""" tar = Path("/mock/dir_name.tar.xz") @@ -902,7 +907,7 @@ def test_get_inventory( m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) m.setattr(Tarball, "extract", lambda _t, _p: Inventory(exp_stream)) m.setattr(Path, "open", lambda _s, _m="rb": exp_stream) - tb = Tarball(tar, Controller(Path("/mock/archive"), cache, None)) + tb = Tarball(tar, Controller(Path("/mock/archive"), cache, make_logger)) tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) @@ -913,19 +918,19 @@ def test_get_inventory( def test_cm_inventory(self, monkeypatch, server_config, make_logger): """Verify the happy path of the high level get_inventory""" - id = None + dataset_id = None class MockTarball: def get_inventory(self, target: str) -> JSONOBJECT: return { - "name": target, + "name": target if self else None, # Quiet the linter "type": CacheType.FILE, "stream": Inventory(io.BytesIO(b"success")), } - def mock_find_dataset(self, dataset: str) -> MockTarball: - nonlocal id - id = dataset + def mock_find_dataset(_self, dataset: str) -> MockTarball: + nonlocal dataset_id + dataset_id = dataset return MockTarball() @@ -933,84 +938,140 @@ def mock_find_dataset(self, dataset: str) -> MockTarball: m.setattr(CacheManager, "find_dataset", mock_find_dataset) cm = CacheManager(server_config, make_logger) inventory = cm.get_inventory("dataset", "target") - assert id == "dataset" + assert dataset_id == "dataset" assert inventory["name"] == "target" assert inventory["stream"].read() == b"success" - def test_tarfile_extract(self, monkeypatch, tmp_path): - """Test to check Tarball.extract success""" - tar = Path("/mock/result.tar.xz") - contents = b"[test]\nfoo=bar\n" - - class MockTarFile: - def extractfile(self, path: str) -> IO[bytes]: - if path == "metadata.log": - return io.BytesIO(contents) - raise Exception("you can't handle exceptions") - - def fake_tarfile_open(tarfile: str, *args): - if str(tarfile) == str(tar): - return MockTarFile() - raise Exception("You didn't see this coming") - - with monkeypatch.context() as m: - m.setattr(tarfile, "open", fake_tarfile_open) - got = Tarball.extract(tar, Path("metadata.log")) - assert isinstance(got, Inventory) - assert got.read() == contents - - def test_tarfile_open_fails(self, monkeypatch, tmp_path): - """Test to check non-existent tarfile""" - tar = Path("/mock/result.tar.xz") - - def fake_tarfile_open(self, path): - raise tarfile.TarError("Invalid Tarfile") - - with monkeypatch.context() as m: - m.setattr(tarfile, "open", fake_tarfile_open) - - expected_error_msg = f"The dataset tarball named '{tar}' is not found" - with pytest.raises(TarballNotFound) as exc: - Tarball.extract(tar, Path("subdir1/f11.txt")) - assert str(exc.value) == expected_error_msg - - def test_tarfile_extractfile_fails(self, monkeypatch, tmp_path): - """Test to check non-existent path in tarfile""" - tar = Path("/mock/result.tar.xz") - path = Path("subdir/f11.txt") - - class MockTarFile: - def extractfile(self, path): - raise Exception("Mr Robot refuses trivial human command") - - def fake_tarfile_open(self, path): - return MockTarFile() - - with monkeypatch.context() as m: - m.setattr(tarfile, "open", fake_tarfile_open) - expected_error_msg = f"Unable to extract {path} from {tar.name}" - with pytest.raises(CacheExtractBadPath) as exc: - Tarball.extract(tar, path) - assert str(exc.value) == expected_error_msg - - def test_tarfile_extractfile_notfile(self, monkeypatch, tmp_path): - """Test to check target that's not a file""" + @pytest.mark.parametrize( + ( + "tar_path", + "popen_fail", + "wait_cnt", + "peek_return", + "poll_return", + "proc_return", + "stderr_contents", + ), + ( + (None, False, 0, b"", None, 2, b""), # No tar executable + ("/usr/bin/tar", True, 0, b"", None, 2, b""), # Popen failure + # Success, output in peek + ("/usr/bin/tar", False, 0, b"[test]", None, 0, b""), + ("/usr/bin/tar", False, 0, b"", 0, 0, b""), # Success, poll() show success + # Loop/sleep twice, then success + ("/usr/bin/tar", False, 2, b"[test]", None, 0, b""), + # Member path failure + ("/usr/bin/tar", False, 0, b"", 1, 1, MEMBER_NOT_FOUND_MSG), + # Archive access failure + ("/usr/bin/tar", False, 0, b"", 1, 1, CANNOT_OPEN_MSG), + # Unexpected failure + ("/usr/bin/tar", False, 0, b"", 1, 1, b"mock-tar: bolt out of the blue!"), + # Hang, never returning output nor an exit code + ("/usr/bin/tar", False, 0, b"", None, None, b""), + ), + ) + def test_tarfile_extract( + self, + monkeypatch, + tmp_path, + tar_path: str, + popen_fail: bool, + wait_cnt: int, + peek_return: Optional[bytes], + poll_return: Optional[int], + proc_return: int, + stderr_contents: Optional[bytes], + ): + """Test to check Tarball.extract behaviors""" tar = Path("/mock/result.tar.xz") - path = Path("subdir/f11.txt") - - class MockTarFile: - def extractfile(self, path): - return None - - def fake_tarfile_open(self, path): - return MockTarFile() + path = "metadata.log" + stdout_contents = b"[test]\nfoo=bar\n" + + class MockBufferedReader(io.BufferedReader): + def __init__(self, contents: bytes): + # No effect, other than to quiet the linter + None if True else super().__init__(io.RawIOBase()) + self.contents = contents + self.loop_count = wait_cnt + + def close(self) -> None: + raise AssertionError( + "This test doesn't expect the stream to be closed." + ) + + def peek(self, size=0) -> bytes: + if self.loop_count > 0: + self.loop_count -= 1 + return b"" + return peek_return + + def read(self, _size: int = -1) -> bytes: + return self.contents + + class MockPopen(subprocess.Popen): + def __init__(self, *_args, **_kwargs): + # No effect, other than to quiet the linter + None if True else super().__init__([]) + if popen_fail: + raise ValueError( + "MockPopen pretending it was called with invalid arguments" + ) + self.stdout = MockBufferedReader(stdout_contents) + self.stderr = MockBufferedReader(stderr_contents) + self.returncode = None + self.loop_count = wait_cnt + + def poll(self) -> Optional[int]: + if self.loop_count > 0: + self.loop_count -= 1 + return None + self.returncode = poll_return + return poll_return + + def kill(self) -> None: + pass + + def mock_shutil_which( + cmd: str, _mode: int = os.F_OK | os.X_OK, _path: Optional[str] = None + ): + assert cmd == "tar" + return tar_path with monkeypatch.context() as m: - m.setattr(tarfile, "open", fake_tarfile_open) - expected_error_msg = f"Unable to extract {path} from {tar.name}" - with pytest.raises(CacheExtractBadPath) as exc: - Tarball.extract(tar, path) - assert str(exc.value) == expected_error_msg + m.setattr(shutil, "which", mock_shutil_which) + m.setattr(subprocess, "Popen", MockPopen) + m.setattr(Inventory, "close", MockBufferedReader.close) + + try: + got = Tarball.extract(tar, path) + except CacheExtractBadPath as exc: + assert tar_path + assert not popen_fail + assert stderr_contents == MEMBER_NOT_FOUND_MSG + assert str(exc) == f"Unable to extract {path} from {tar.name}" + except subprocess.TimeoutExpired as exc: + assert tar_path + assert not popen_fail + assert ( + not peek_return and not poll_return + ), f"Unexpected test timeout: {exc}" + except TarballUnpackError as exc: + if tar_path is None: + msg = "External 'tar' executable not found" + else: + assert not popen_fail + msg = f"Unexpected error from {tar_path}: {stderr_contents.decode()!r}" + assert stderr_contents != MEMBER_NOT_FOUND_MSG + assert str(exc) == f"An error occurred while unpacking {tar}: {msg}" + except ValueError: + assert tar_path + assert popen_fail + else: + assert tar_path + assert not popen_fail + assert peek_return or poll_return is not None + assert isinstance(got, Inventory) + assert got.read() == stdout_contents @pytest.mark.parametrize( "tarball,stream", (("hasmetalog.tar.xz", True), ("nometalog.tar.xz", False)) @@ -1018,7 +1079,6 @@ def fake_tarfile_open(self, path): def test_get_metadata(self, monkeypatch, tarball, stream): """Verify access and processing of `metadata.log`""" - @staticmethod def fake_extract(t: Path, f: Path): if str(t) == tarball and str(f) == f"{Dataset.stem(t)}/metadata.log": if stream: @@ -1027,7 +1087,7 @@ def fake_extract(t: Path, f: Path): raise Exception(f"Unexpected mock exception with stream:{stream}: {t}, {f}") with monkeypatch.context() as m: - m.setattr(Tarball, "extract", fake_extract) + m.setattr(Tarball, "extract", staticmethod(fake_extract)) metadata = Tarball._get_metadata(Path(tarball)) if stream: @@ -1035,34 +1095,258 @@ def fake_extract(t: Path, f: Path): else: assert metadata is None - def test_inventory(self): - closed = False + def test_inventory_without_subprocess(self): + """Test the Inventory class when used without a subprocess - class MockTarFile: - def close(self): - nonlocal closed - closed = True + This tests the Inventory class functions other than close(), which are + unaffected by whether a subprocess is driving the stream, and it also + tests the behavior of close() when there is no subprocess. + """ + calls = [] + my_buffer = bytes() - def __repr__(self) -> str: - return "" + class MockBufferedReader(io.BufferedReader): + def __init__(self): + # No effect, other than to quiet the linter + None if True else super().__init__(io.RawIOBase()) - raw = b"abcde\nfghij\n" - stream = Inventory(io.BytesIO(raw), MockTarFile()) - assert re.match( - r"^ from >$", - str(stream), - ) + def close(self) -> None: + calls.append("close") + + def getbuffer(self): + calls.append("getbuffer") if self else None # Quiet the linter + return my_buffer + + def read(self, _size: int = -1) -> bytes: + calls.append("read") + return b"read" + + def readable(self) -> bool: + calls.append("readable") + return True + + def readline(self, _size: int = -1) -> bytes: + """Return a non-empty byte-string on the first call; return an + empty string on subsequent calls.""" + calls.append("readline") + return b"readline" if len(calls) < 2 else b"" + + def seek(self, offset: int, _whence: int = io.SEEK_SET) -> int: + calls.append("seek") + return offset + + # Invoke the CUT + stream = Inventory(MockBufferedReader(), None) + + assert stream.subproc is None + + # Test Inventory.getbuffer() + calls.clear() + assert stream.getbuffer() is my_buffer and calls == ["getbuffer"] + + # Test Inventory.read() + calls.clear() + assert stream.read() == b"read" and calls == ["read"] + + # Test Inventory.readable() + calls.clear() + assert stream.readable() and calls == ["readable"] - assert stream.getbuffer() == raw - assert stream.readable() - assert stream.read(5) == b"abcde" - assert stream.read() == b"\nfghij\n" - assert stream.seek(0) == 0 - assert [b for b in stream] == [b"abcde\n", b"fghij\n"] + # Test Inventory.seek() + calls.clear() + assert stream.seek(12345) == 12345 and calls == ["seek"] + + # Test Inventory.__iter__() and Inventory.__next__() + calls.clear() + contents = [b for b in stream] + assert contents == [b"readline"] and calls == ["readline", "readline"] + + # Test Inventory.__repr__() + assert str(stream) == " from None>" + + # Test Inventory.close() + calls.clear() stream.close() - assert closed - with pytest.raises(ValueError): - stream.read() + assert calls == ["close"] + + @pytest.mark.parametrize( + ("poll_val", "stdout_size", "stderr_size", "wait_timeout", "exp_calls"), + ( + # The subprocess completed before the close() call + (0, 0, None, None, ["poll", "close"]), + # The subprocess is still running when close() is called, the wait + # does not time out, stdout is empty and there is no stderr. + (None, 0, None, False, ["poll", "kill", "stdout", "wait", "close"]), + # The subprocess is still running when close() is called, the wait + # does not time out, stderr is empty and there is no stdout. + (None, None, 0, False, ["poll", "kill", "stderr", "wait", "close"]), + # The subprocess is still running when close() is called, the wait + # does not time out, both stdout and stderr are present and empty. + (None, 0, 0, False, ["poll", "kill", "stdout", "stderr", "wait", "close"]), + # The subprocess is still running when close() is called, the wait + # does not time out, stdout and stderr each require one read to + # drain them (and a second to see that they are empty). + ( + None, + 2000, + 2000, + False, + [ + "poll", + "kill", + "stdout", + "stdout", + "stderr", + "stderr", + "wait", + "close", + ], + ), + # The subprocess is still running when close() is called, the wait + # does not time out, stdout and stderr each require two reads to + # drain them (and a third to see that they are empty). + ( + None, + 6000, + 6000, + False, + [ + "poll", + "kill", + "stdout", + "stdout", + "stdout", + "stderr", + "stderr", + "stderr", + "wait", + "close", + ], + ), + # The subprocess is still running when close() is called, the wait + # does not time out, stdout and stderr each require three reads to + # drain them (and a fourth to see that they are empty). + ( + None, + 9000, + 9000, + False, + [ + "poll", + "kill", + "stdout", + "stdout", + "stdout", + "stdout", + "stderr", + "stderr", + "stderr", + "stderr", + "wait", + "close", + ], + ), + # The subprocess is still running when close() is called, stdout is + # empty, there is no stderr, and the wait times out. + (None, 0, None, True, ["poll", "kill", "stdout", "wait"]), + ), + ) + def test_inventory( + self, poll_val, stdout_size, stderr_size, wait_timeout, exp_calls + ): + """Test the Inventory class when used with a subprocess + + This test focuses on the behavior of the close() function, since the + behavior of the other functions are checked in the previous test. + """ + my_calls = [] + + class MockPopen(subprocess.Popen): + def __init__( + self, + stdout: Optional[io.BufferedReader], + stderr: Optional[io.BufferedReader], + ): + # No effect, other than to quiet the linter. + None if True else super().__init__([]) + self.stdout = stdout + self.stderr = stderr + self.returncode = None + + def kill(self): + my_calls.append("kill") + + def poll(self) -> Optional[int]: + my_calls.append("poll") + assert ( + self.returncode is None + ), "returncode is unexpectedly set...test bug?" + self.returncode = poll_val + return self.returncode + + def wait(self, timeout: Optional[float] = None) -> Optional[int]: + my_calls.append("wait") + assert ( + self.returncode is None + ), "returncode is unexpectedly set...test bug?" + if wait_timeout: + raise subprocess.TimeoutExpired( + cmd="mock_subprocess", + timeout=timeout, + output=b"I'm dead!", + stderr=b"No, really, I'm dead!", + ) + self.returncode = 0 + return self.returncode + + def __repr__(self): + return self.__class__.__name__ + + class MockBufferedReader(io.BufferedReader): + def __init__(self, size: int, name: str): + # No effect, other than to quiet the linter + None if True else super().__init__(io.RawIOBase()) + self.size = size + self.stream_name = name + + def close(self) -> None: + my_calls.append("close") + pass + + def read(self, size: int = -1) -> bytes: + my_calls.append(self.stream_name) + if self.size <= 0: + return b"" + if size < 0 or size >= self.size: + self.size = 0 + else: + self.size -= size + return b"read" + + my_stdout = ( + None if stdout_size is None else MockBufferedReader(stdout_size, "stdout") + ) + my_stderr = ( + None if stderr_size is None else MockBufferedReader(stderr_size, "stderr") + ) + my_stream = my_stdout if my_stdout is not None else my_stderr + assert my_stream, "Test bug: we need at least one of stdout and stderr" + + # Invoke the CUT + stream = Inventory(my_stream, MockPopen(my_stdout, my_stderr)) + + # Test Inventory.__repr__() + assert str(stream) == " from MockPopen>" + + try: + stream.close() + except subprocess.TimeoutExpired: + assert wait_timeout, "wait() timed out unexpectedly" + else: + assert not wait_timeout, "wait() failed to time out as expected" + + assert stream.subproc is None + assert my_calls == exp_calls def test_find( self, selinux_enabled, server_config, make_logger, tarball, monkeypatch @@ -1072,12 +1356,6 @@ def test_find( through the various supported methods. """ - def mock_run(args, **kwargs): - """Prevents the Tarball contents from actually being unpacked""" - return subprocess.CompletedProcess( - args, returncode=0, stdout="Success!", stderr=None - ) - monkeypatch.setattr(Tarball, "_get_metadata", fake_get_metadata) source_tarball, source_md5, md5 = tarball dataset_name = Dataset.stem(source_tarball) @@ -1096,7 +1374,7 @@ def mock_run(args, **kwargs): # Test __getitem__ assert tarball == cm[md5] with pytest.raises(TarballNotFound) as exc: - cm["foobar"] + _ = cm["foobar"] assert str(exc.value) == "The dataset tarball named 'foobar' is not found" # Test __contains__ @@ -1150,12 +1428,6 @@ def test_lifecycle( delete it. """ - def mock_run(args, **kwargs): - """Prevents the Tarball contents from actually being unpacked""" - return subprocess.CompletedProcess( - args, returncode=0, stdout="Success!", stderr=None - ) - source_tarball, source_md5, md5 = tarball cm = CacheManager(server_config, make_logger) archive = cm.archive_root / "ABC" @@ -1184,9 +1456,9 @@ def mock_run(args, **kwargs): assert md5file.exists() assert md5 == md5file.read_text() - hash = hashlib.md5() - hash.update(tarfile.read_bytes()) - assert md5 == hash.hexdigest() + md5_hash = hashlib.md5() + md5_hash.update(tarfile.read_bytes()) + assert md5 == md5_hash.hexdigest() assert list(cm.controllers.keys()) == ["ABC"] dataset_name = source_tarball.name[:-7]