Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve portability of reproducible tarballs by replacing external tar command with tarfile module #4660

Open
wants to merge 17 commits into
base: 5.0.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
ec1ede9
use more portable --date argument for touch command used in reproduci…
lexming Sep 27, 2024
e7f3bbd
stop reproducible tarball generation command on any failure in the pipe
lexming Sep 27, 2024
ca09f4e
move command to make reproducible archives to its own generator metho…
lexming Sep 27, 2024
d0a55ba
replace harcoded pattern for reproducible archives command for call t…
lexming Sep 27, 2024
d7195c7
use tarfile module instead of executing external shell commands to cr…
lexming Oct 7, 2024
87b733a
add required flag to filetools.find_extension() method
lexming Oct 8, 2024
dd28095
add support for extended_dry_run mode to filetools.make_archive()
lexming Oct 8, 2024
a37af5a
add unit test for filetools.make_archive()
lexming Oct 8, 2024
f2296de
make test for github_get_source_tarball_from_git compatible with make…
lexming Oct 8, 2024
980f618
improve reliability of bit-wise operations setting file mode in repro…
lexming Oct 8, 2024
ddb9cae
Merge branch '5.0.x' into reprod-tarballs-mac
lexming Nov 5, 2024
a26c71e
set reproducible flag of make_archives from a specific variable
lexming Nov 5, 2024
3936a6e
simplify logic in EasyBlock.get_checksum_for and improve its logging
lexming Nov 5, 2024
6188cc7
ignore checksums of sources from git repos prior to Python 3.9
lexming Nov 5, 2024
ee772f7
only run checksum assertions in test_make_archive on Python 3.9+
lexming Nov 5, 2024
27ea1d0
add test_fetch_sources_git to easyblock suite
lexming Nov 5, 2024
ad47cac
push deprecation of cheksum check of git repo with Python < 3.9 to Ea…
lexming Nov 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2772,15 +2772,32 @@ def get_source_tarball_from_git(filename, target_dir, git_config):

if keep_git_dir:
# create archive of git repo including .git directory
tar_cmd = ['tar', 'cfvz', archive_path, repo_name]
tar_cmd = f"tar cfvz {archive_path} {repo_name}"
else:
# create reproducible archive
# see https://reproducible-builds.org/docs/archives/
tar_cmd = [
tar_cmd = reproducible_archive_cmd(repo_name, archive_path)

run_shell_cmd(tar_cmd, work_dir=tmpdir, hidden=True, verbose_dry_run=True)

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return archive_path


def reproducible_archive_cmd(dir_name, archive_name):
"""
Return string with command to make reproducible archive from a given directory
see https://reproducible-builds.org/docs/archives/
"""
try:
cmd_pipe = [
# stop on failure of any command in the pipe
'set', '-eo pipefail', ';',
# print names of all files and folders excluding .git directory
'find', repo_name, '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps to epoch 0 (equivalent to --mtime in GNU tar)
'-exec', 'touch', '--date=@0', '{}', r'\;',
'find', str(dir_name), '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps to epoch 0
'-exec', 'touch', '--date=1970-01-01T00:00:00.00Z', '{}', r'\;',
# reset file permissions of cloned repo (equivalent to --mode in GNU tar)
'-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|',
# sort file list (equivalent to --sort in GNU tar)
Expand All @@ -2789,14 +2806,12 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner',
'--format=gnu', '--null', '--files-from', '-', '|',
# compress tarball with gzip without original file name and timestamp
'gzip', '--no-name', '>', archive_path
'gzip', '--no-name', '>', str(archive_name)
]
run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True)
except TypeError as err:
raise EasyBuildError("reproducible_archive_cmd: wrong directory or archive name given") from err

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return archive_path
return " ".join(cmd_pipe)


def move_file(path, target_path, force_in_dry_run=False):
Expand Down
21 changes: 8 additions & 13 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2939,16 +2939,11 @@ def run_check():
'git_repo': '[email protected]:easybuilders/testrepository.git',
'test_prefix': self.test_prefix,
}
reprod_tar_cmd_pattern = (
r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |'
r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner'
r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz'
)

expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
lexming marked this conversation as resolved.
Show resolved Hide resolved
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2957,7 +2952,7 @@ def run_check():
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("test123"),
ft.reproducible_archive_cmd("test123", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2967,7 +2962,7 @@ def run_check():
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2977,7 +2972,7 @@ def run_check():
' running shell command "git clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2991,7 +2986,7 @@ def run_check():
+ ' clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3015,7 +3010,7 @@ def run_check():
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3026,7 +3021,7 @@ def run_check():
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3038,7 +3033,7 @@ def run_check():
r" \(in /.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in /.*/testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in /.*\)",
]) % string_args
run_check()
Expand Down
Loading