Skip to content

Commit

Permalink
Improve the logging of the backup_utils module (#166)
Browse files Browse the repository at this point in the history
A number of changes to improve the logging:

* A base logger `disk_objectstore` is defined and the `backup_utils`
  uses a child logger. This makes it easy to control the logging config
  for the entire package.
* The `BackupManager` no longer takes a `logger` as argument. The base
  logger should be configured instead.
* Logger is now only configured in the CLI. Not in the API itself.
* `BackupManager` removes the `exes` argument and now only takes the
  `rsync_exe` argument to specify the `rsync` executable.
* Log messages use lazy interpolation instead of f-string formatting
* `rsync` is now called with `--info` flag to show a progress bar. This
  option is not available for rsync older than v3, in which case the
  `--progress` flag is used instead.
  • Loading branch information
eimrek authored Feb 22, 2024
1 parent 23c784a commit d23dac4
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 78 deletions.
5 changes: 5 additions & 0 deletions disk_objectstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
It does not require a server running.
"""

import logging

from .container import CompressMode, Container, ObjectType

LOGGER = logging.getLogger(__name__)

__all__ = ("Container", "ObjectType", "CompressMode")

__version__ = "1.0.0"
129 changes: 80 additions & 49 deletions disk_objectstore/backup_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime
import logging
import random
import re
import shutil
import sqlite3
import string
Expand All @@ -13,10 +14,10 @@
from pathlib import Path
from typing import Callable, Optional

from disk_objectstore import LOGGER as BASE_LOGGER
from disk_objectstore.container import Container

logging.basicConfig(format="%(levelname)s:%(message)s")
backup_logger = logging.getLogger(__name__)
LOGGER = BASE_LOGGER.getChild(__name__)


class BackupError(Exception):
Expand Down Expand Up @@ -49,23 +50,13 @@ class BackupManager:
def __init__(
self,
dest: str,
logger: logging.Logger,
keep: int = 1,
exes: Optional[dict] = None,
rsync_exe: Optional[str] = None,
) -> None:
self.dest = dest
self.keep = keep
self.logger = logger
self.remote, self.path = split_remote_and_path(dest)

if exes is None:
self.exes = {}
else:
self.exes = exes

# make sure rsync gets added so it gets validated
if "rsync" not in self.exes:
self.exes["rsync"] = "rsync"
self.rsync_exe = rsync_exe if rsync_exe is not None else "rsync"

# Validate the backup config inputs

Expand All @@ -77,10 +68,10 @@ def __init__(
if self.remote:
self.check_if_remote_accessible()

if self.exes:
for _, path in self.exes.items():
if not is_exe_found(path):
raise ValueError(f"Input validation failed: {path} not accessible.")
if not is_exe_found(self.rsync_exe):
raise ValueError(
f"Input validation failed: {self.rsync_exe} not accessible."
)

if not self.check_path_exists(self.path):
success = self.run_cmd(["mkdir", str(self.path)])[0]
Expand All @@ -89,13 +80,15 @@ def __init__(
f"Input validation failed: Couldn't access/create '{str(self.path)}'!"
)

self.rsync_version = self.get_rsync_major_version()

def check_if_remote_accessible(self):
"""Check if remote host is accessible via ssh"""
self.logger.info(f"Checking if '{self.remote}' is accessible...")
LOGGER.info("Checking if '%s' is accessible...", self.remote)
success = self.run_cmd(["exit"])[0]
if not success:
raise BackupError(f"Remote '{self.remote}' is not accessible!")
self.logger.info("Success! '%s' is accessible!", self.remote)
LOGGER.info("Success! '%s' is accessible!", self.remote)

def check_path_exists(self, path: Path) -> bool:
cmd = ["[", "-e", str(path), "]"]
Expand All @@ -114,17 +107,35 @@ def run_cmd(

res = subprocess.run(all_args, capture_output=True, text=True, check=False)

self.logger.debug(
f"Command: {all_args}\n"
f" Exit Code: {res.returncode}\n"
f" stdout/stderr: {res.stdout}\n{res.stderr}"
LOGGER.debug(
"Command: %s\n Exit Code: %d\n stdout/stderr: %s\n%s",
str(all_args),
res.returncode,
res.stdout,
res.stderr,
)

success = res.returncode == 0

return success, res.stdout

def call_rsync( # pylint: disable=too-many-arguments
def get_rsync_major_version(self):
"""
Get the rsync major version.
"""
result = subprocess.run(
[self.rsync_exe, "--version"],
capture_output=True,
text=True,
check=False,
)
pattern = r"rsync\s+version\s+(\d+\.\d+\.\d+)"
match = re.search(pattern, result.stdout)
if match:
return int(match.group(1).split(".")[0])
return None

def call_rsync( # pylint: disable=too-many-arguments,too-many-branches
self,
src: Path,
dest: Path,
Expand All @@ -145,12 +156,27 @@ def call_rsync( # pylint: disable=too-many-arguments
:param dest_trailing_slash:
Add a trailing slash to the destination path. This makes rsync interpret the
destination as a folder and create it if it doesn't exists.
"""

assert "rsync" in self.exes
all_args = [
self.rsync_exe,
"-azh",
"--no-whole-file",
]

capture_output = True
if LOGGER.isEnabledFor(logging.INFO):
capture_output = False
if self.rsync_version and self.rsync_version >= 3:
# These options show progress in a nicer way but
# they're only available for rsync version 3+
all_args += ["--info=progress2,stats1"]
else:
LOGGER.info("rsync version <3 detected: showing 'legacy' progress.")
all_args += ["--progress"]

all_args = [self.exes["rsync"], "-azh", "-vv", "--no-whole-file"]
if LOGGER.isEnabledFor(logging.DEBUG):
all_args += ["-vv"]
if extra_args:
all_args += extra_args
if link_dest:
Expand All @@ -173,16 +199,18 @@ def call_rsync( # pylint: disable=too-many-arguments
else:
all_args += [f"{self.remote}:{dest_str}"]

res = subprocess.run(all_args, capture_output=True, text=True, check=False)
cmd_str = " ".join(all_args)
LOGGER.info("Running '%s'", cmd_str)

self.logger.debug(
"Command: %s\n Exit Code: %s\n stdout/stderr: %s\n%s",
str(all_args),
res.returncode,
res.stdout,
res.stderr,
res = subprocess.run(
all_args, capture_output=capture_output, text=True, check=False
)

info_text = f"rsync completed. Exit Code: {res.returncode}"
if capture_output:
info_text += f"\nstdout/stderr: {res.stdout}\n{res.stderr}"
LOGGER.info(info_text)

if res.returncode != 0:
raise BackupError(f"rsync failed for: {str(src)} to {str(dest)}")

Expand Down Expand Up @@ -224,9 +252,9 @@ def delete_old_backups(self):
for folder in to_delete:
success = self.run_cmd(["rm", "-rf", folder])[0]
if success:
self.logger.info(f"Deleted old backup: {folder}")
LOGGER.info("Deleted old backup: %s", folder)
else:
self.logger.warning("Warning: couldn't delete old backup: %s", folder)
LOGGER.warning("Warning: couldn't delete old backup: %s", folder)

def backup_auto_folders(self, backup_func: Callable) -> None:
"""Create a backup, managing live and previous backup folders automatically
Expand All @@ -247,11 +275,11 @@ def backup_auto_folders(self, backup_func: Callable) -> None:
last_folder = self.get_last_backup_folder()

if last_folder:
self.logger.info(
f"Last backup is '{str(last_folder)}', using it for rsync --link-dest."
LOGGER.info(
"Last backup is '%s', using it for rsync --link-dest.", str(last_folder)
)
else:
self.logger.info("Couldn't find a previous backup to increment from.")
LOGGER.info("Couldn't find a previous backup to increment from.")

backup_func(
live_folder,
Expand All @@ -273,20 +301,23 @@ def backup_auto_folders(self, backup_func: Callable) -> None:
f"Failed to move '{str(live_folder)}' to '{str(self.path / folder_name)}'"
)

self.logger.info(
f"Backup moved from '{str(live_folder)}' to '{str(self.path / folder_name)}'."
LOGGER.info(
"Backup moved from '%s' to '%s'.",
str(live_folder),
str(self.path / folder_name),
)

symlink_name = "last-backup"
success = self.run_cmd(
["ln", "-sfn", str(folder_name), str(self.path / symlink_name)]
)[0]
if not success:
self.logger.warning(
f"Couldn't create symlink '{symlink_name}'. Perhaps the filesystem doesn't support it."
LOGGER.warning(
"Couldn't create symlink '%s'. Perhaps the filesystem doesn't support it.",
symlink_name,
)
else:
self.logger.info(f"Added symlink '{symlink_name}' to '{folder_name}'.")
LOGGER.info("Added symlink '%s' to '%s'.", symlink_name, folder_name)

self.delete_old_backups()

Expand Down Expand Up @@ -331,7 +362,7 @@ def backup_container(
prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None

manager.call_rsync(loose_path, path, link_dest=prev_backup_loose)
manager.logger.info(f"Transferred {str(loose_path)} to {str(path)}")
LOGGER.info("Transferred %s to %s", str(loose_path), str(path))

# step 2: back up sqlite db

Expand All @@ -341,18 +372,18 @@ def backup_container(
_sqlite_backup(sqlite_path, sqlite_temp_loc)

if sqlite_temp_loc.is_file():
manager.logger.info(f"Dumped the SQLite database to {str(sqlite_temp_loc)}")
LOGGER.info("Dumped the SQLite database to %s", str(sqlite_temp_loc))
else:
raise BackupError(f"'{str(sqlite_temp_loc)}' failed to be created.")

# step 3: transfer the SQLITE database file
manager.call_rsync(sqlite_temp_loc, path, link_dest=prev_backup)
manager.logger.info(f"Transferred SQLite database to {str(path)}")
LOGGER.info("Transferred SQLite database to %s", str(path))

# step 4: transfer the packed files
packs_path_rel = packs_path.relative_to(container_root_path)
manager.call_rsync(packs_path, path, link_dest=prev_backup)
manager.logger.info(f"Transferred {str(packs_path)} to {str(path)}")
LOGGER.info("Transferred %s to %s", str(packs_path), str(path))

# step 5: transfer anything else in the container folder
manager.call_rsync(
Expand Down
15 changes: 9 additions & 6 deletions disk_objectstore/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A small CLI tool for managing stores."""

import dataclasses
import json
import logging
Expand Down Expand Up @@ -228,24 +229,26 @@ def backup(
by OpenSSH, such as adding configuration options to ~/.ssh/config (e.g. to allow for passwordless
login - recommended, since this script might ask multiple times for the password).
NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will not work on
NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will likely not work on
non-UNIX environments.
"""

logging.basicConfig(format="%(levelname)s:%(message)s")
logger = logging.getLogger("disk_objectstore")

if verbosity == "silent":
backup_utils.backup_logger.setLevel(logging.ERROR)
logger.setLevel(logging.ERROR)
elif verbosity == "info":
backup_utils.backup_logger.setLevel(logging.INFO)
logger.setLevel(logging.INFO)
elif verbosity == "debug":
backup_utils.backup_logger.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)

with dostore.container as container:
try:
backup_manager = backup_utils.BackupManager(
dest,
backup_utils.backup_logger,
exes={"rsync": rsync_exe},
keep=keep,
rsync_exe=rsync_exe,
)
backup_manager.backup_auto_folders(
lambda path, prev: backup_utils.backup_container(
Expand Down
Loading

0 comments on commit d23dac4

Please sign in to comment.