Skip to content

Commit

Permalink
Add preliminary backup command
Browse files Browse the repository at this point in the history
  • Loading branch information
eimrek committed Sep 26, 2023
1 parent 4e05dc0 commit 3696d13
Show file tree
Hide file tree
Showing 2 changed files with 278 additions and 1 deletion.
240 changes: 240 additions & 0 deletions disk_objectstore/backup_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
"""
Utilities to back up a container.
"""

import shutil
import sqlite3
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

from disk_objectstore.container import Container


def _log(msg, end="\n"):
print(msg, end=end)

Check warning on line 16 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L16

Added line #L16 was not covered by tests


def _is_exe_found(exe) -> bool:
return shutil.which(exe) is not None

Check warning on line 20 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L20

Added line #L20 was not covered by tests


def _run_cmd(args: list, remote: Optional[str] = None, check: bool = True) -> bool:
"""
Run a command locally or remotely.
"""
all_args = args[:]
if remote:
all_args = ["ssh", remote] + all_args

Check warning on line 29 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L27-L29

Added lines #L27 - L29 were not covered by tests

try:
res = subprocess.run(all_args, capture_output=True, text=True, check=check)
except subprocess.CalledProcessError as exc:
_log("Error: " + str(exc))
return False

Check warning on line 35 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L31-L35

Added lines #L31 - L35 were not covered by tests

_log(f"stdout: {all_args}\n{res.stdout}")
_log(f"stderr: {all_args}\n{res.stderr}")

Check warning on line 38 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L37-L38

Added lines #L37 - L38 were not covered by tests

success = not bool(res.returncode)

Check warning on line 40 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L40

Added line #L40 was not covered by tests

return success

Check warning on line 42 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L42

Added line #L42 was not covered by tests


def _check_if_remote_accessible(remote: str) -> bool:
_log(f"Checking if '{remote}' is accessible...", end="")
success = _run_cmd(["exit"], remote=remote)
if not success:
_log(f"Error: Remote '{remote}' is not accessible!")
return False
_log(f"Success! '{remote}' is accessible!")
return True

Check warning on line 52 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L46-L52

Added lines #L46 - L52 were not covered by tests


def _check_path_exists(path: Path, remote: Optional[str] = None) -> bool:
cmd = ["[", "-e", str(path), "]"]
return _run_cmd(cmd, remote=remote, check=False)

Check warning on line 57 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L56-L57

Added lines #L56 - L57 were not covered by tests


def _call_rsync( # pylint: disable=too-many-arguments
args: list,
src: Path,
dest: Path,
link_dest: Optional[Path] = None,
remote: Optional[str] = None,
src_trailing_slash: bool = False,
dest_trailing_slash: bool = False,
) -> bool:
"""Call rsync with specified arguments and handle possible errors & stdout/stderr
:param link_dest:
Path to the hardlinked files location (previous backup).
:param src_trailing_slash:
Add a trailing slash to the source path. This makes rsync copy the contents
of the folder instead of the folder itself.
:param dest_trailing_slash:
Add a trailing slash to the destination path. This makes rsync interpret the
destination as a folder and create it if it doesn't exists.
:return:
True if successful and False if unsuccessful.
"""

all_args = args[:]
if link_dest:
if not remote:

Check warning on line 88 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L86-L88

Added lines #L86 - L88 were not covered by tests
# for local paths, use resolve() to get absolute path
link_dest_str = str(link_dest.resolve())

Check warning on line 90 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L90

Added line #L90 was not covered by tests
else:
# for remote paths, we require absolute paths anyways
link_dest_str = str(link_dest)
all_args += [f"--link-dest={link_dest_str}"]

Check warning on line 94 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L93-L94

Added lines #L93 - L94 were not covered by tests

if src_trailing_slash:
all_args += [str(src) + "/"]

Check warning on line 97 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L96-L97

Added lines #L96 - L97 were not covered by tests
else:
all_args += [str(src)]

Check warning on line 99 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L99

Added line #L99 was not covered by tests

dest_str = str(dest)
if dest_trailing_slash:
dest_str += "/"

Check warning on line 103 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L101-L103

Added lines #L101 - L103 were not covered by tests

if not remote:
all_args += [dest_str]

Check warning on line 106 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L105-L106

Added lines #L105 - L106 were not covered by tests
else:
all_args += [f"{remote}:{dest_str}"]

Check warning on line 108 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L108

Added line #L108 was not covered by tests

try:
res = subprocess.run(all_args, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError as exc:
_log(f"Error: {exc}")
return False

Check warning on line 114 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L110-L114

Added lines #L110 - L114 were not covered by tests

_log(f"stdout: {all_args}\n{res.stdout}")
_log(f"stderr: {all_args}\n{res.stderr}")

Check warning on line 117 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L116-L117

Added lines #L116 - L117 were not covered by tests

success = not bool(res.returncode)

Check warning on line 119 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L119

Added line #L119 was not covered by tests

return success

Check warning on line 121 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L121

Added line #L121 was not covered by tests


def backup( # pylint: disable=too-many-return-statements, too-many-branches
container: Container,
path: Path,
remote: Optional[str] = None,
prev_backup: Optional[Path] = None,
rsync_exe: str = "rsync",
) -> bool:
"""Create a backup of the disk-objectstore container
It should be done in the following order:
1) loose files;
2) sqlite database;
3) packed files.
:return:
True is successful and False if unsuccessful.
"""

# ------------------
# input validation:
if remote:
if not _check_if_remote_accessible(remote):
return False

Check warning on line 146 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L144-L146

Added lines #L144 - L146 were not covered by tests

if not _is_exe_found(rsync_exe):
_log(f"Error: {rsync_exe} not accessible.")
return False

Check warning on line 150 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L148-L150

Added lines #L148 - L150 were not covered by tests

path_exists = _check_path_exists(path, remote)

Check warning on line 152 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L152

Added line #L152 was not covered by tests

if not path_exists:
success = _run_cmd(["mkdir", str(path)], remote=remote)
if not success:
_log(f"Error: Couldn't access/create '{str(path)}'!")
return False

Check warning on line 158 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L154-L158

Added lines #L154 - L158 were not covered by tests

if prev_backup:
if not _check_path_exists(prev_backup, remote):
_log(f"Error: {str(prev_backup)} not found.")
return False

Check warning on line 163 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L160-L163

Added lines #L160 - L163 were not covered by tests
# ------------------

# subprocess arguments shared by all rsync calls:
rsync_args = [rsync_exe, "-azh", "-vv", "--no-whole-file"]

Check warning on line 167 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L167

Added line #L167 was not covered by tests

container_root_path = container.get_folder()
loose_path = container._get_loose_folder() # pylint: disable=protected-access
packs_path = container._get_pack_folder() # pylint: disable=protected-access
sqlite_path = container._get_pack_index_path() # pylint: disable=protected-access

Check warning on line 172 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L169-L172

Added lines #L169 - L172 were not covered by tests

# step 1: back up loose files
loose_path_rel = loose_path.relative_to(container_root_path)
prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None
success = _call_rsync(

Check warning on line 177 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L175-L177

Added lines #L175 - L177 were not covered by tests
rsync_args, loose_path, path, remote=remote, link_dest=prev_backup_loose
)
if not success:
return False

Check warning on line 181 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L180-L181

Added lines #L180 - L181 were not covered by tests

# step 2: back up sqlite db

# make a temporary directory to dump sqlite db locally
with tempfile.TemporaryDirectory() as temp_dir_name:
sqlite_temp_loc = Path(temp_dir_name) / "packs.idx"

Check warning on line 187 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L186-L187

Added lines #L186 - L187 were not covered by tests

# Safe way to make a backup of the sqlite db, while it might potentially be accessed
# https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
src = sqlite3.connect(str(sqlite_path))
dst = sqlite3.connect(str(sqlite_temp_loc))
with dst:
src.backup(dst)
dst.close()
src.close()

Check warning on line 196 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L191-L196

Added lines #L191 - L196 were not covered by tests

if sqlite_temp_loc.is_file():
_log(f"Dumped the SQLite database to {str(sqlite_temp_loc)}")

Check warning on line 199 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L198-L199

Added lines #L198 - L199 were not covered by tests
else:
_log(f"Error: '{str(sqlite_temp_loc)}' was not created.")
return False

Check warning on line 202 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L201-L202

Added lines #L201 - L202 were not covered by tests

# step 3: transfer the SQLITE database file
success = _call_rsync(

Check warning on line 205 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L205

Added line #L205 was not covered by tests
rsync_args, sqlite_temp_loc, path, remote=remote, link_dest=prev_backup
)
if not success:
return False

Check warning on line 209 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L208-L209

Added lines #L208 - L209 were not covered by tests

# step 4: transfer the packed files
packs_path_rel = packs_path.relative_to(container_root_path)
prev_backup_packs = prev_backup / packs_path_rel if prev_backup else None
success = _call_rsync(

Check warning on line 214 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L212-L214

Added lines #L212 - L214 were not covered by tests
rsync_args, packs_path, path, remote=remote, link_dest=prev_backup_packs
)
if not success:
return False

Check warning on line 218 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L217-L218

Added lines #L217 - L218 were not covered by tests

# step 5: transfer anything else in the container folder
success = _call_rsync(

Check warning on line 221 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L221

Added line #L221 was not covered by tests
rsync_args
+ [
"--exclude",
str(loose_path_rel),
"--exclude",
"packs.idx",
"--exclude",
str(packs_path_rel),
],
container_root_path,
path,
link_dest=prev_backup,
remote=remote,
src_trailing_slash=True,
)
if not success:
return False

Check warning on line 238 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L237-L238

Added lines #L237 - L238 were not covered by tests

return True

Check warning on line 240 in disk_objectstore/backup_utils.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/backup_utils.py#L240

Added line #L240 was not covered by tests
39 changes: 38 additions & 1 deletion disk_objectstore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import click

from disk_objectstore import __version__
from disk_objectstore import __version__, backup_utils
from disk_objectstore.container import Container


Expand Down Expand Up @@ -183,3 +183,40 @@ def optimize(
container.clean_storage(vacuum=vacuum)
size = sum(f.stat().st_size for f in dostore.path.glob("**/*") if f.is_file())
click.echo(f"Final container size: {round(size/1000, 2)} Mb")


@main.command("backup")
@click.argument("path", nargs=1, type=click.Path())
@click.option(
"--remote",
default=None,
help="ssh remote of the backup location.",
)
@click.option(
"--prev_backup",
default=None,
help="Previous backup location for rsync link-dest.",
)
@click.option(
"--rsync_exe",
default="rsync",
help="Specify the 'rsync' executable, if not in PATH.",
)
@pass_dostore
def backup(
dostore: ContainerContext,
path: str,
remote: Optional[str],
prev_backup: Optional[str],
rsync_exe: str,
):
"""Create a backup of the container"""

with dostore.container as container:
backup_utils.backup(

Check warning on line 216 in disk_objectstore/cli.py

View check run for this annotation

Codecov / codecov/patch

disk_objectstore/cli.py#L215-L216

Added lines #L215 - L216 were not covered by tests
container,
Path(path),
remote=remote,
prev_backup=Path(prev_backup) if prev_backup else None,
rsync_exe=rsync_exe,
)

0 comments on commit 3696d13

Please sign in to comment.