Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eimrek committed Dec 7, 2023
1 parent c5ecf2f commit 5a20caa
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 57 deletions.
91 changes: 37 additions & 54 deletions disk_objectstore/backup_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,33 @@ class BackupManager:
def __init__(
self,
dest: str,
keep: int,
logger: logging.Logger,
exes: dict,
keep: int = 1,
exes: Optional[dict] = None,
) -> None:
self.dest = dest
self.keep = keep
self.logger = logger
self.remote, self.path = split_remote_and_path(dest)

self.exes = exes
if exes is None:
self.exes = {}
else:
self.exes = exes

# make sure rsync gets added so it gets validated
if "rsync" not in self.exes:
self.exes["rsync"] = "rsync"

self.validate()

def check_if_remote_accessible(self) -> bool:
def check_if_remote_accessible(self):
"""Check if remote host is accessible via ssh"""
self.logger.info(f"Checking if '{self.remote}' is accessible...")
success = self.run_cmd(["exit"])[0]
if not success:
self.logger.error(f"Remote '{self.remote}' is not accessible!")
return False
raise BackupError(f"Remote '{self.remote}' is not accessible!")
self.logger.info("Success! '%s' is accessible!", self.remote)
return True

def check_path_exists(self, path: Path) -> bool:
cmd = ["[", "-e", str(path), "]"]
Expand All @@ -92,10 +93,7 @@ def validate(self):
)

if self.remote:
if not self.check_if_remote_accessible():
raise BackupError(
"Input validation failed: keep variable can't be negative!"
)
self.check_if_remote_accessible()

if self.exes:
for _, path in self.exes.items():
Expand Down Expand Up @@ -142,9 +140,8 @@ def call_rsync( # pylint: disable=too-many-arguments
dest: Path,
link_dest: Optional[Path] = None,
src_trailing_slash: bool = False,
dest_trailing_slash: bool = False,
extra_args: Optional[list] = None,
) -> bool:
):
"""Call rsync with specified arguments and handle possible errors & stdout/stderr
:param link_dest:
Expand All @@ -154,10 +151,6 @@ def call_rsync( # pylint: disable=too-many-arguments
Add a trailing slash to the source path. This makes rsync copy the contents
of the folder instead of the folder itself.
:param dest_trailing_slash:
Add a trailing slash to the destination path. This makes rsync interpret the
destination as a folder and create it if it doesn't exists.
:return:
True if successful and False if unsuccessful.
"""
Expand All @@ -182,8 +175,6 @@ def call_rsync( # pylint: disable=too-many-arguments
all_args += [str(src)]

dest_str = str(dest)
if dest_trailing_slash:
dest_str += "/"

if not self.remote:
all_args += [dest_str]
Expand All @@ -202,7 +193,8 @@ def call_rsync( # pylint: disable=too-many-arguments

success = res.returncode == 0

return success
if not success:
raise BackupError(f"rsync failed for: {str(src)} to {str(dest)}")

# ----
# Utilities to manage multiple folders of backups, e.g. hard-linking to previous backup;
Expand All @@ -211,7 +203,7 @@ def call_rsync( # pylint: disable=too-many-arguments

def get_existing_backup_folders(self):
"""Get all folders matching the backup folder name pattern."""
_, stdout = self.run_cmd(
success, stdout = self.run_cmd(
[
"find",
str(self.path),
Expand All @@ -225,6 +217,9 @@ def get_existing_backup_folders(self):
]
)

if not success:
raise BackupError("Existing backups determination failed.")

return stdout.splitlines()

def get_last_backup_folder(self):
Expand Down Expand Up @@ -261,10 +256,7 @@ def backup_auto_folders(self, backup_func: Callable):

live_folder = self.path / "live-backup"

try:
last_folder = self.get_last_backup_folder()
except subprocess.CalledProcessError as exc:
raise BackupError("Couldn't determine last backup.") from exc
last_folder = self.get_last_backup_folder()

if last_folder:
self.logger.info(
Expand Down Expand Up @@ -308,10 +300,20 @@ def backup_auto_folders(self, backup_func: Callable):
else:
self.logger.info(f"Added symlink '{symlink_name}' to '{folder_name}'.")

try:
self.delete_old_backups()
except subprocess.CalledProcessError:
self.logger.warning("Failed to delete old backups.")
self.delete_old_backups()


def _sqlite_backup(src: Path, dst: Path):
"""
Safe way to make a backup of the sqlite db, while it might potentially be accessed
https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
"""
src_connect = sqlite3.connect(str(src))
dst_connect = sqlite3.connect(str(dst))
with dst_connect:
src_connect.backup(dst_connect)
dst_connect.close()
src_connect.close()


def backup_container(
Expand Down Expand Up @@ -341,48 +343,33 @@ def backup_container(
# step 1: back up loose files
loose_path_rel = loose_path.relative_to(container_root_path)
prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None
success = manager.call_rsync(loose_path, path, link_dest=prev_backup_loose)
if not success:
raise BackupError(f"rsync failed for: {str(loose_path)} to {str(path)}")

manager.call_rsync(loose_path, path, link_dest=prev_backup_loose)
manager.logger.info(f"Transferred {str(loose_path)} to {str(path)}")

# step 2: back up sqlite db

# make a temporary directory to dump sqlite db locally
with tempfile.TemporaryDirectory() as temp_dir_name:
sqlite_temp_loc = Path(temp_dir_name) / "packs.idx"

# Safe way to make a backup of the sqlite db, while it might potentially be accessed
# https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
src = sqlite3.connect(str(sqlite_path))
dst = sqlite3.connect(str(sqlite_temp_loc))
with dst:
src.backup(dst)
dst.close()
src.close()
_sqlite_backup(sqlite_path, sqlite_temp_loc)

if sqlite_temp_loc.is_file():
manager.logger.info(f"Dumped the SQLite database to {str(sqlite_temp_loc)}")
else:
raise BackupError(f"'{str(sqlite_temp_loc)}' failed to be created.")

# step 3: transfer the SQLITE database file
success = manager.call_rsync(sqlite_temp_loc, path, link_dest=prev_backup)
if not success:
raise BackupError(
f"rsync failed for: {str(sqlite_temp_loc)} to {str(path)}"
)
manager.call_rsync(sqlite_temp_loc, path, link_dest=prev_backup)
manager.logger.info(f"Transferred SQLite database to {str(path)}")

# step 4: transfer the packed files
packs_path_rel = packs_path.relative_to(container_root_path)
success = manager.call_rsync(packs_path, path, link_dest=prev_backup)
if not success:
raise BackupError(f"rsync failed for: {str(packs_path)} to {str(path)}")
manager.call_rsync(packs_path, path, link_dest=prev_backup)
manager.logger.info(f"Transferred {str(packs_path)} to {str(path)}")

# step 5: transfer anything else in the container folder
success = manager.call_rsync(
manager.call_rsync(
container_root_path,
path,
link_dest=prev_backup,
Expand All @@ -396,7 +383,3 @@ def backup_container(
str(packs_path_rel),
],
)
if not success:
raise BackupError(
f"rsync failed for: {str(container_root_path)} to {str(path)}"
)
5 changes: 4 additions & 1 deletion disk_objectstore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def backup(
with dostore.container as container:
try:
backup_manager = backup_utils.BackupManager(
dest, keep, backup_utils.backup_logger, exes={"rsync": rsync_exe}
dest,
backup_utils.backup_logger,
exes={"rsync": rsync_exe},
keep=keep,
)
backup_manager.backup_auto_folders(
lambda path, prev: backup_utils.backup_container(
Expand Down
134 changes: 134 additions & 0 deletions tests/test_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Test the backup functionality.
"""

import random
import string
from pathlib import Path

import pytest

from disk_objectstore import backup_utils
from disk_objectstore.backup_utils import BackupError, BackupManager


def _random_string(n=10):
return "".join(random.choices(string.ascii_lowercase + string.digits, k=n))


def test_invalid_destination():
"""Test invalid destination with two colons."""
dest = "localhost:/tmp/test:"
with pytest.raises(BackupError, match="Invalid destination format"):
BackupManager(dest, backup_utils.backup_logger)


def test_inaccessible_remote():
"""Test a remote destination of random characters that will not be accessible."""
dest = f"_{_random_string()}:/tmp/test"
with pytest.raises(BackupError, match="is not accessible"):
BackupManager(dest, backup_utils.backup_logger)


def test_negative_keep():
"""Test a negative keep value."""
dest = "/tmp/test"
with pytest.raises(BackupError, match="keep variable can't be negative"):
BackupManager(dest, backup_utils.backup_logger, keep=-1)


def test_inaccessible_exe():
"""Test case where rsync is not accessible."""
dest = "/tmp/test"
rsync_exe = f"_{_random_string()}"
with pytest.raises(BackupError, match=f"{rsync_exe} not accessible."):
BackupManager(dest, backup_utils.backup_logger, exes={"rsync": rsync_exe})


def test_inaccessible_path():
"""Test case where path is not accessible."""
dest = f"/_{_random_string()}" # I assume there will be a permission error for this path
with pytest.raises(BackupError, match=f"Couldn't access/create '{dest}'"):
BackupManager(dest, backup_utils.backup_logger)


def test_rsync_failure():
"""Test case where rsync fails."""
dest = "/tmp/test"
with pytest.raises(BackupError, match="rsync failed"):
manager = BackupManager(dest, backup_utils.backup_logger)
# pick a src that doesn't exists
manager.call_rsync(Path(f"/_{_random_string()}"), Path(dest))


def test_existing_backups_failure():
"""Test case where existing backups fail to be determined."""
dest = "/tmp/test"
with pytest.raises(BackupError, match="Existing backups determination failed"):
manager = BackupManager(dest, backup_utils.backup_logger)
# override the path to something that will fail
manager.path = f"/_{_random_string()}"
manager.get_existing_backup_folders()


def test_mv_failure(monkeypatch, temp_container, temp_dir):
"""Test case where mv command fails by monkeypatching."""

# save a reference to the original run_cmd command
original_run_cmd = backup_utils.BackupManager.run_cmd

# monkeypatch the run_cmd command to fail when "mv" is used
def mock_run_cmd(self, args):
if "mv" in args:
return False, ""
return original_run_cmd(self, args)

monkeypatch.setattr(
backup_utils.BackupManager,
"run_cmd",
mock_run_cmd,
)

# make a container and back it up
temp_container.init_container(clear=True)
# Add a few objects
for idx in range(100):
temp_container.add_object(f"test-{idx}".encode())

dest = Path(temp_dir) / "backup"
with pytest.raises(BackupError, match="Failed to move"):
manager = BackupManager(str(dest), backup_utils.backup_logger)
manager.backup_auto_folders(
lambda path, prev: backup_utils.backup_container(
manager, temp_container, path, prev
)
)


def test_sqlite_failure(monkeypatch, temp_container, temp_dir):
"""Test case where sqlite fails to make a backup file."""

# monkeypatch sqlite backup to do nothing
def mock_sqlite_backup(src, dst): # pylint: disable=unused-argument
pass

monkeypatch.setattr(
backup_utils,
"_sqlite_backup",
mock_sqlite_backup,
)

# make a container
temp_container.init_container(clear=True)
# Add a few objects
for idx in range(100):
temp_container.add_object(f"test-{idx}".encode())

dest = Path(temp_dir) / "backup"
with pytest.raises(BackupError, match="'.*' failed to be created."):
manager = BackupManager(str(dest), backup_utils.backup_logger)
manager.backup_auto_folders(
lambda path, prev: backup_utils.backup_container(
manager, temp_container, path, prev
)
)
10 changes: 8 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ def test_backup(temp_container, temp_dir, remote, verbosity):
@pytest.mark.skipif(
platform.system() == "Windows", reason="Backup not supported on Windows"
)
def test_backup_repeated(temp_container, temp_dir):
@pytest.mark.parametrize("remote", [False, True])
def test_backup_repeated(temp_container, temp_dir, remote):
"""Test the backup command repeated 3 times.
Considering --keep 1 is default, the last one should get deleted.
Expand All @@ -268,8 +269,13 @@ def test_backup_repeated(temp_container, temp_dir):

path = Path(temp_dir) / "backup"

if remote:
destination = f"localhost:{str(path)}"
else:
destination = str(path)

for _ in range(3):
result = CliRunner().invoke(cli.backup, [str(path)], obj=obj)
result = CliRunner().invoke(cli.backup, [destination], obj=obj)
assert result.exit_code == 0

assert path.exists()
Expand Down

0 comments on commit 5a20caa

Please sign in to comment.