Skip to content

Commit

Permalink
fix: archive copy hangs (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 authored Mar 23, 2023
1 parent dfebe77 commit ae8bb92
Show file tree
Hide file tree
Showing 44 changed files with 57 additions and 184 deletions.
6 changes: 2 additions & 4 deletions cubi_tk/archive/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,9 @@ def run_hashdeep(directory, out_file=None, num_threads=4, ref_file=None):
else:
cmd += ["-o", "fl", "."]
# Run hashdeep from the directory, storing the output in f
p = subprocess.Popen(cmd, cwd=directory, encoding="utf-8", stdout=f, stderr=subprocess.PIPE)
p.communicate()
p = subprocess.Popen(cmd, cwd=directory, encoding="utf-8", stdout=f, stderr=None)
p.wait()
# Return hashdeep return value
if out_file:
f.close()
return p.returncode


Expand Down
11 changes: 7 additions & 4 deletions cubi_tk/archive/copy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""``cubi-tk archive prepare``: Prepare a project for archival"""

import argparse
import atexit
import datetime
import os
import re
Expand Down Expand Up @@ -116,6 +117,11 @@ def execute(self) -> typing.Optional[int]:
rel_symlinks = self._find_relative_symlinks(self.project_dir, rel_symlinks)
logger.info("Set {} relative symlinks aside".format(len(rel_symlinks)))

# Make sure to restore relative symlinks
atexit.register(
self._restore_relative_symlinks, root=self.project_dir, rel_symlinks=rel_symlinks
)

tmpdir = tempfile.TemporaryDirectory()

status = 0
Expand Down Expand Up @@ -167,10 +173,6 @@ def execute(self) -> typing.Optional[int]:
except Exception as e:
status = 1
logger.error(e)
finally:
# Restore relative symlinks to the original temporary destination
logger.info("Restoring relative symlinks")
self._restore_relative_symlinks(self.project_dir, rel_symlinks)

return status

Expand Down Expand Up @@ -267,6 +269,7 @@ def _restore_relative_symlinks(self, root, rel_symlinks, add_dangling=True):
if add_dangling or os.path.exists(os.path.join(symlink_dir, target)):
os.makedirs(symlink_dir, mode=488, exist_ok=True) # 488 is 750 in octal
os.symlink(target, symlink_path)
logger.info("Restored relative symlinks in {}".format(root))


def setup_argparse(parser: argparse.ArgumentParser) -> None:
Expand Down
2 changes: 1 addition & 1 deletion environment.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
channels:
- conda-forge
- bioconda
- defaults
- conda-forge

name: cubi-tk

Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ argcomplete
pyyaml

# VarFish REST API client.
varfish-cli >=0.3.5
varfish-cli >=0.5.1

# Compact, round-tripable configuration format.
toml
Expand Down
Binary file added tests/data/archive/final_dest_verif.tar.gz
Binary file not shown.
Binary file removed tests/data/archive/final_dest_verif/.snakemake.tar.gz
Binary file not shown.
20 changes: 0 additions & 20 deletions tests/data/archive/final_dest_verif/1970-01-01_hashdeep_report.txt

This file was deleted.

Empty file.

This file was deleted.

This file was deleted.

This file was deleted.

31 changes: 0 additions & 31 deletions tests/data/archive/final_dest_verif/hashdeep_audit.txt

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Empty file.

This file was deleted.

Empty file.

This file was deleted.

Empty file.

This file was deleted.

23 changes: 0 additions & 23 deletions tests/data/archive/final_dest_verif/workdir_audit.txt

This file was deleted.

20 changes: 0 additions & 20 deletions tests/data/archive/final_dest_verif/workdir_report.txt

This file was deleted.

Binary file added tests/data/archive/temp_dest_verif.tar.gz
Binary file not shown.
Binary file removed tests/data/archive/temp_dest_verif/.snakemake.tar.gz
Binary file not shown.
20 changes: 0 additions & 20 deletions tests/data/archive/temp_dest_verif/1970-01-01_hashdeep_report.txt

This file was deleted.

29 changes: 0 additions & 29 deletions tests/data/archive/temp_dest_verif/README.md

This file was deleted.

Empty file.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

1 change: 0 additions & 1 deletion tests/data/archive/temp_dest_verif/pipeline/output/sample2

This file was deleted.

This file was deleted.

This file was deleted.

Empty file.

This file was deleted.

Empty file.

This file was deleted.

Empty file.

This file was deleted.

35 changes: 29 additions & 6 deletions tests/test_archive_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pytest

from cubi_tk.__main__ import main, setup_argparse
from cubi_tk.common import execute_shell_commands

HASHDEEP_TITLES_PATTERN = re.compile("^(%|#).*$")
IGNORE_FILES_PATTERN = re.compile("^(.*/)?(hashdeep|workdir)_(report|audit)\\.txt$")
Expand Down Expand Up @@ -62,14 +63,36 @@ def sort_hashdeep_title_and_body(filename):


def test_run_archive_copy_smoke_test(mocker):
base_path = os.path.join(os.path.dirname(__file__), "data", "archive")
with tempfile.TemporaryDirectory() as tmp_dir:
repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive")
execute_shell_commands(
[
[
"tar",
"-zxf",
os.path.join(base_path, "temp_dest_verif.tar.gz"),
"--directory",
tmp_dir,
]
]
)
execute_shell_commands(
[
[
"tar",
"-zxf",
os.path.join(base_path, "final_dest_verif.tar.gz"),
"--directory",
tmp_dir,
]
]
)

argv = [
"archive",
"copy",
"--keep-workdir-hashdeep",
os.path.join(repo_dir, "temp_dest_verif"),
os.path.join(tmp_dir, "temp_dest_verif"),
os.path.join(tmp_dir, "final_dest"),
]
setup_argparse()
Expand All @@ -88,22 +111,22 @@ def test_run_archive_copy_smoke_test(mocker):

# --- check report
(repo_titles, repo_body) = sort_hashdeep_title_and_body(
os.path.join(repo_dir, "final_dest_verif", "workdir_report.txt")
os.path.join(tmp_dir, "final_dest_verif", "workdir_report.txt")
)
(tmp_titles, tmp_body) = sort_hashdeep_title_and_body(
os.path.join(tmp_dir, "final_dest", "workdir_report.txt")
)

# --- check audits
for fn in ["hashdeep_audit", "workdir_audit"]:
with open(os.path.join(repo_dir, "final_dest_verif", fn + ".txt"), "r") as f:
with open(os.path.join(tmp_dir, "final_dest_verif", fn + ".txt"), "r") as f:
repo = sorted(f.readlines())
with open(os.path.join(tmp_dir, "final_dest", fn + ".txt"), "r") as f:
tmp = sorted(f.readlines())
assert repo == tmp

# --- test all copied files, except the hashdeep report & audit, that can differ by line order
prefix = os.path.join(repo_dir, "final_dest_verif")
prefix = os.path.join(tmp_dir, "final_dest_verif")
ref_fns = [
os.path.relpath(x, start=prefix)
for x in filter(
Expand All @@ -123,7 +146,7 @@ def test_run_archive_copy_smoke_test(mocker):
test_fns = filter(lambda x: not IGNORE_FILES_PATTERN.match(x), test_fns)

matches, mismatches, errors = filecmp.cmpfiles(
os.path.join(repo_dir, "final_dest_verif"),
os.path.join(tmp_dir, "final_dest_verif"),
os.path.join(tmp_dir, "final_dest"),
common=ref_fns,
shallow=False,
Expand Down
Loading

0 comments on commit ae8bb92

Please sign in to comment.