diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index cfe0220202..77cd6278ba 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -49,6 +49,7 @@ import os import re import stat +import sys import tempfile import time import traceback @@ -358,34 +359,49 @@ def get_checksum_for(self, checksums, filename=None, index=None): :param filename: name of the file to obtain checksum for :param index: index of file in list """ - checksum = None - - # sometimes, filename are specified as a dict + chksum_input = filename + chksum_input_git = None + # if filename is provided as dict, take 'filename' key if isinstance(filename, dict): - filename = filename['filename'] + chksum_input = filename.get('filename', None) + chksum_input_git = filename.get('git_config', None) + # early return if no filename given + if chksum_input is None: + self.log.debug("Cannot get checksum without a file name") + return None + + if sys.version_info[0] >= 3 and sys.version_info[1] < 9: + # ignore any checksum for given filename due to changes in https://github.com/python/cpython/issues/90021 + # checksums of tarballs made by EB of git repos cannot be reliably checked prior to Python 3.9 + if chksum_input_git is not None: + self.log.deprecated( + "Reproducible tarballs of Git repos are only possible when using Python 3.9+ to run EasyBuild. " + f"Checksum of {chksum_input} cannot be verified.", + '6.0' + ) + return None + checksum = None # if checksums are provided as a dict, lookup by source filename as key if isinstance(checksums, dict): - if filename is not None and filename in checksums: - checksum = checksums[filename] - else: - checksum = None - elif isinstance(checksums, (list, tuple)): - if index is not None and index < len(checksums) and (index >= 0 or abs(index) <= len(checksums)): + try: + checksum = checksums[chksum_input] + except KeyError: + self.log.debug("Checksum not found for file: %s", chksum_input) + elif isinstance(checksums, (list, tuple)) and index is not None: + try: checksum = checksums[index] - else: - checksum = None - elif checksums is None: - checksum = None - else: + except IndexError: + self.log.debug("Checksum not found for index list: %s", index) + elif checksums is not None: raise EasyBuildError("Invalid type for checksums (%s), should be dict, list, tuple or None.", type(checksums)) if checksum is None or build_option("checksum_priority") == CHECKSUM_PRIORITY_JSON: json_checksums = self.get_checksums_from_json() - return json_checksums.get(filename, None) - else: - return checksum + return json_checksums.get(chksum_input, None) + + return checksum def get_checksums_from_json(self, always_read=False): """ diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 89613e0f03..28c644320b 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -48,6 +48,7 @@ import inspect import itertools import os +import pathlib import platform import re import shutil @@ -55,6 +56,7 @@ import stat import ssl import sys +import tarfile import tempfile import time import zlib @@ -1408,13 +1410,12 @@ def find_extension(filename): suffixes = sorted(EXTRACT_CMDS.keys(), key=len, reverse=True) pat = r'(?P%s)$' % '|'.join([s.replace('.', '\\.') for s in suffixes]) res = re.search(pat, filename, flags=re.IGNORECASE) + if res: - ext = res.group('ext') + return res.group('ext') else: raise EasyBuildError("%s has unknown file extension", filename) - return ext - def extract_cmd(filepath, overwrite=False): """ @@ -2644,7 +2645,7 @@ def get_source_tarball_from_git(filename, target_dir, git_config): """ Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it - :param filename: name of the archive to save the code to (must be .tar.gz) + :param filename: name of the archive file to save the code to (including extension) :param target_dir: target directory where to save the archive to :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit """ @@ -2680,9 +2681,6 @@ def get_source_tarball_from_git(filename, target_dir, git_config): if not url: raise EasyBuildError("url not specified in git_config parameter") - if not filename.endswith('.tar.gz'): - raise EasyBuildError("git_config currently only supports filename ending in .tar.gz") - # prepare target directory and clone repository mkdir(target_dir, parents=True) @@ -2768,30 +2766,9 @@ def get_source_tarball_from_git(filename, target_dir, git_config): run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True) # Create archive - archive_path = os.path.join(target_dir, filename) - - if keep_git_dir: - # create archive of git repo including .git directory - tar_cmd = ['tar', 'cfvz', archive_path, repo_name] - else: - # create reproducible archive - # see https://reproducible-builds.org/docs/archives/ - tar_cmd = [ - # print names of all files and folders excluding .git directory - 'find', repo_name, '-name ".git"', '-prune', '-o', '-print0', - # reset access and modification timestamps to epoch 0 (equivalent to --mtime in GNU tar) - '-exec', 'touch', '--date=@0', '{}', r'\;', - # reset file permissions of cloned repo (equivalent to --mode in GNU tar) - '-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|', - # sort file list (equivalent to --sort in GNU tar) - 'LC_ALL=C', 'sort', '--zero-terminated', '|', - # create tarball in GNU format with ownership and permissions reset - 'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', - '--format=gnu', '--null', '--files-from', '-', '|', - # compress tarball with gzip without original file name and timestamp - 'gzip', '--no-name', '>', archive_path - ] - run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True) + repo_path = os.path.join(tmpdir, repo_name) + reproducible = not keep_git_dir # presence of .git directory renders repo unreproducible + archive_path = make_archive(repo_path, archive_file=filename, archive_dir=target_dir, reproducible=reproducible) # cleanup (repo_name dir does not exist in dry run mode) remove(tmpdir) @@ -2799,6 +2776,126 @@ def get_source_tarball_from_git(filename, target_dir, git_config): return archive_path +def make_archive(source_dir, archive_file=None, archive_dir=None, reproducible=True): + """ + Create an archive file of the given directory + The format of the tarball is defined by the extension of the archive file name + + :source_dir: string with path to directory to be archived + :archive_file: string with filename of archive + :archive_dir: string with path to directory to place the archive + :reproducible: make a tarball that is reproducible accross systems + - see https://reproducible-builds.org/docs/archives/ + - requires uncompressed or LZMA compressed archive images + - gzip is currently not supported due to undeterministic data injected in its headers + see https://github.com/python/cpython/issues/112346 + + Default behaviour: reproducible tarball in .tar.xz + """ + def reproducible_filter(tarinfo): + "Filter out system-dependent data from tarball" + # contents of '.git' subdir are inherently system dependent + if "/.git/" in tarinfo.name or tarinfo.name.endswith("/.git"): + return None + # set timestamp to epoch 0 + tarinfo.mtime = 0 + # reset file permissions by applying go+u,go-w + user_mode = tarinfo.mode & stat.S_IRWXU + group_mode = (user_mode >> 3) & ~stat.S_IWGRP # user mode without write + other_mode = group_mode >> 3 # same as group mode + tarinfo.mode = (tarinfo.mode & ~0o77) | group_mode | other_mode + # reset ownership to numeric UID/GID 0 + # equivalent in GNU tar to 'tar --owner=0 --group=0 --numeric-owner' + tarinfo.uid = tarinfo.gid = 0 + tarinfo.uname = tarinfo.gname = "" + return tarinfo + + compression = { + # taken from EXTRACT_CMDS + '.gtgz': 'gz', + '.tar.gz': 'gz', + '.tgz': 'gz', + '.tar.bz2': 'bz2', + '.tb2': 'bz2', + '.tbz': 'bz2', + '.tbz2': 'bz2', + '.tar.xz': 'xz', + '.txz': 'xz', + '.tar': '', + } + reproducible_compression = ["", "xz"] + default_ext = ".tar.xz" + + if archive_file is None: + archive_file = os.path.basename(source_dir) + default_ext + + try: + archive_ext = find_extension(archive_file) + except EasyBuildError: + if "." in archive_file: + # archive filename has unknown extension (set for raise) + archive_ext = "" + else: + # archive filename has no extension, use default one + archive_ext = default_ext + archive_file += archive_ext + + if archive_ext not in compression: + # archive filename has unsupported extension + raise EasyBuildError( + f"Unsupported archive format: {archive_file}. Supported tarball extensions: {', '.join(compression)}" + ) + _log.debug(f"Archive extension and compression: {archive_ext} in {compression[archive_ext]}") + + archive_path = archive_file if archive_dir is None else os.path.join(archive_dir, archive_file) + + archive = { + 'name': archive_path, + 'mode': f"w:{compression[archive_ext]}", + 'format': tarfile.GNU_FORMAT, + 'encoding': "utf-8", + } + + if reproducible: + if compression[archive_ext] == "xz": + # ensure a consistent compression level in reproducible tarballs with XZ + archive["preset"] = 6 + elif compression[archive_ext] not in reproducible_compression: + # requested archive compression cannot be made reproducible + print_warning( + f"Requested reproducible archive with unsupported file compression ({compression[archive_ext]}). " + "Please use XZ instead." + ) + reproducible = False + + archive_filter = reproducible_filter if reproducible else None + + if build_option('extended_dry_run'): + # early return in dry run mode + dry_run_msg("Archiving '%s' into '%s'...", source_dir, archive_path) + return archive_path + _log.info("Archiving '%s' into '%s'...", source_dir, archive_path) + + # TODO: replace with TarFile.add(recursive=True) when support for Python 3.6 drops + # since Python v3.7 tarfile automatically orders the list of files added to the archive + # see Tarfile.add documentation: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add + source_files = [source_dir] + # pathlib's glob includes hidden files + source_files.extend([str(filepath) for filepath in pathlib.Path(source_dir).glob("**/*")]) + source_files.sort() # independent of locale + + with tarfile.open(**archive) as tar_archive: + for filepath in source_files: + # archive with target directory in its top level, remove any prefix in path + file_name = os.path.relpath(filepath, start=os.path.dirname(source_dir)) + tar_archive.add(filepath, arcname=file_name, recursive=False, filter=archive_filter) + _log.debug("File/folder added to archive '%s': %s", archive_file, filepath) + + _log.info("Archive '%s' created successfully", archive_file) + + return archive_path + + def move_file(path, target_path, force_in_dry_run=False): """ Move a file from path to target_path diff --git a/test/framework/easyblock.py b/test/framework/easyblock.py index 35da26fc62..11bb3bf9fb 100644 --- a/test/framework/easyblock.py +++ b/test/framework/easyblock.py @@ -35,6 +35,7 @@ import sys import tempfile from inspect import cleandoc +from test.framework.github import requires_github_access from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered, init_config from unittest import TextTestRunner @@ -1618,6 +1619,40 @@ def test_fetch_sources(self): error_pattern = "Found one or more unexpected keys in 'sources' specification: {'nosuchkey': 'foobar'}" self.assertErrorRegex(EasyBuildError, error_pattern, eb.fetch_sources, sources, checksums=[]) + @requires_github_access() + def test_fetch_sources_git(self): + """Test fetch_sources method from git repo.""" + testdir = os.path.abspath(os.path.dirname(__file__)) + ec = process_easyconfig(os.path.join(testdir, 'easyconfigs', 'test_ecs', 't', 'toy', 'toy-0.0.eb'))[0] + eb = get_easyblock_instance(ec) + eb.src = [] + sources = [ + { + 'filename': 'testrepository.tar.xz', + 'git_config': { + 'repo_name': 'testrepository', + 'url': 'https://github.com/easybuilders', + 'tag': 'branch_tag_for_test', + } + } + ] + checksums = ["00000000"] + with self.mocked_stdout_stderr(): + eb.fetch_sources(sources, checksums=checksums) + + self.assertEqual(len(eb.src), 1) + self.assertEqual(eb.src[0]['name'], "testrepository.tar.xz") + self.assertExists(eb.src[0]['path']) + self.assertEqual(eb.src[0]['cmd'], None) + + reference_checksum = "00000000" + if sys.version_info[0] >= 3 and sys.version_info[1] < 9: + # checksums of tarballs made by EB cannot be reliably checked prior to Python 3.9 + # due to changes introduced in python/cpython#90021 + reference_checksum = None + + self.assertEqual(eb.src[0]['checksum'], reference_checksum) + def test_download_instructions(self): """Test use of download_instructions easyconfig parameter.""" diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 911dc858ce..c0770e33c4 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2920,7 +2920,7 @@ def test_github_get_source_tarball_from_git(self): def run_check(): """Helper function to run get_source_tarball_from_git & check dry run output""" with self.mocked_stdout_stderr(): - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) stdout = self.get_stdout() stderr = self.get_stderr() self.assertEqual(stderr, '') @@ -2928,7 +2928,7 @@ def run_check(): self.assertTrue(regex.search(stdout), "Pattern '%s' found in: %s" % (regex.pattern, stdout)) self.assertEqual(os.path.dirname(res), target_dir) - self.assertEqual(os.path.basename(res), 'test.tar.gz') + self.assertEqual(os.path.basename(res), 'test.tar.xz') git_config = { 'repo_name': 'testrepository', @@ -2939,47 +2939,38 @@ def run_check(): 'git_repo': 'git@github.com:easybuilders/testrepository.git', 'test_prefix': self.test_prefix, } - reprod_tar_cmd_pattern = ( - r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |' - r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner' - r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz' - ) expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"', + r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo}"', r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['clone_into'] = 'test123' expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"', - r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("test123"), + r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo} test123"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='test123') run_check() del git_config['clone_into'] git_config['recursive'] = True expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"', + r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive {git_repo}"', r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] expected = '\n'.join([ ' running shell command "git clone --depth 1 --branch tag_for_tests --recursive' - + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', - r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), + + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['extra_config_params'] = [ @@ -2989,60 +2980,72 @@ def run_check(): expected = '\n'.join([ ' running shell command "git -c submodule."fastahack".active=false -c submodule."sha1".active=false' + ' clone --depth 1 --branch tag_for_tests --recursive' - + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', + + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"', r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['recurse_submodules'] del git_config['extra_config_params'] git_config['keep_git_dir'] = True expected = '\n'.join([ - r' running shell command "git clone --branch tag_for_tests --recursive %(git_repo)s"', - r" \(in .*/tmp.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz testrepository"', + r' running shell command "git clone --branch tag_for_tests --recursive {git_repo}"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['keep_git_dir'] del git_config['tag'] git_config['commit'] = '8456f86' expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in .*/tmp.*\)", r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"', - r" \(in testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r" \(in .*/testrepository\)", + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in .*/tmp.*\)", - r' running shell command "git checkout 8456f86"', - r" \(in testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r' running shell command "git checkout 8456f86 && git submodule update --init ' + r"--recursive --recurse-submodules='!vcflib' --recurse-submodules='!sdsl-lite'\"", + r" \(in .*/testrepository\)", + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['recursive'] del git_config['recurse_submodules'] expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in /.*\)", r' running shell command "git checkout 8456f86"', r" \(in /.*/testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in /.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() + # files with a recognizable extension have those extensions automatically removed with a warning + bad_filenames = ['test.tar', 'test.tar.gz', 'test.tar.xz', 'test.zip'] + # files with arbitrary extensions are taken as is and get extra 'tar.xz' extension appended + good_filenames = ['test', 'test-1.2.3', 'test.txt', 'test-1.2.3.txt'] + for test_filename in bad_filenames + good_filenames: + with self.mocked_stdout_stderr(): + res = ft.get_source_tarball_from_git(test_filename, target_dir, git_config) + stderr = self.get_stderr() + regex = re.compile("Ignoring extension of filename.*set in git_config parameter") + if test_filename in bad_filenames: + self.assertTrue(regex.search(stderr), "Pattern '%s' found in: %s" % (regex.pattern, stderr)) + self.assertTrue(res.endswith("test.tar.xz")) + else: + self.assertFalse(regex.search(stderr), "Pattern '%s' found in: %s" % (regex.pattern, stderr)) + self.assertTrue(res.endswith(test_filename + ".tar.xz")) + # Test with real data. init_config() git_config = { @@ -3052,13 +3055,13 @@ def run_check(): } try: - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) # (only) tarball is created in specified target dir - test_file = os.path.join(target_dir, 'test.tar.gz') + test_file = os.path.join(target_dir, 'test.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs = [os.path.basename(test_file)] - self.assertEqual(os.listdir(target_dir), ['test.tar.gz']) + test_tar_files = [os.path.basename(test_file)] + self.assertEqual(os.listdir(target_dir), ['test.tar.xz']) # Check that we indeed downloaded the right tag extracted_dir = tempfile.mkdtemp(prefix='extracted_dir') with self.mocked_stdout_stderr(): @@ -3069,7 +3072,7 @@ def run_check(): # use a tag that clashes with a branch name and make sure this is handled correctly git_config['tag'] = 'tag_for_tests' with self.mocked_stdout_stderr(): - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) stderr = self.get_stderr() self.assertIn('Tag tag_for_tests was not downloaded in the first try', stderr) self.assertEqual(res, test_file) @@ -3082,20 +3085,20 @@ def run_check(): del git_config['tag'] git_config['commit'] = '90366ea' - res = ft.get_source_tarball_from_git('test2.tar.gz', target_dir, git_config) - test_file = os.path.join(target_dir, 'test2.tar.gz') + res = ft.get_source_tarball_from_git('test2', target_dir, git_config) + test_file = os.path.join(target_dir, 'test2.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs.append(os.path.basename(test_file)) - self.assertEqual(sorted(os.listdir(target_dir)), test_tar_gzs) + test_tar_files.append(os.path.basename(test_file)) + self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files) git_config['keep_git_dir'] = True - res = ft.get_source_tarball_from_git('test3.tar.gz', target_dir, git_config) - test_file = os.path.join(target_dir, 'test3.tar.gz') + res = ft.get_source_tarball_from_git('test3', target_dir, git_config) + test_file = os.path.join(target_dir, 'test3.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs.append(os.path.basename(test_file)) - self.assertEqual(sorted(os.listdir(target_dir)), test_tar_gzs) + test_tar_files.append(os.path.basename(test_file)) + self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files) except EasyBuildError as err: if "Network is down" in str(err): @@ -3108,7 +3111,7 @@ def run_check(): 'url': 'git@github.com:easybuilders', 'tag': 'tag_for_tests', } - args = ['test.tar.gz', self.test_prefix, git_config] + args = ['test', self.test_prefix, git_config] for key in ['repo_name', 'url', 'tag']: orig_value = git_config.pop(key) @@ -3129,10 +3132,91 @@ def run_check(): self.assertErrorRegex(EasyBuildError, error_pattern, ft.get_source_tarball_from_git, *args) del git_config['unknown'] - args[0] = 'test.txt' - error_pattern = "git_config currently only supports filename ending in .tar.gz" - self.assertErrorRegex(EasyBuildError, error_pattern, ft.get_source_tarball_from_git, *args) - args[0] = 'test.tar.gz' + def test_make_archive(self): + """Test for make_archive method""" + # create fake directories and files to be archived + tmpdir = tempfile.mkdtemp() + tardir = os.path.join(tmpdir, "test_archive") + os.mkdir(tardir) + for path in ('bin', 'lib', 'include'): + os.mkdir(os.path.join(tardir, path)) + ft.write_file(os.path.join(tardir, 'README'), 'Dummy readme') + ft.write_file(os.path.join(tardir, 'bin', 'executable'), 'Dummy binary') + ft.write_file(os.path.join(tardir, 'lib', 'lib.so'), 'Dummy library') + ft.write_file(os.path.join(tardir, 'include', 'header.h'), 'Dummy header') + + # default behaviour + unreprod_txz = ft.make_archive(tardir, reproducible=False) + unreprod_txz_chksum = ft.compute_checksum(unreprod_txz, checksum_type="sha256") + self.assertEqual(unreprod_txz, "test_archive.tar.xz") + self.assertExists(unreprod_txz) + os.remove(unreprod_txz) + reprod_txz = ft.make_archive(tardir, reproducible=True) + reprod_txz_chksum = ft.compute_checksum(reprod_txz, checksum_type="sha256") + self.assertEqual(reprod_txz, "test_archive.tar.xz") + self.assertExists(reprod_txz) + os.remove(reprod_txz) + # custom filenames + custom_txz = ft.make_archive(tardir, archive_file="custom_name", reproducible=True) + custom_txz_chksum = ft.compute_checksum(custom_txz, checksum_type="sha256") + self.assertEqual(custom_txz, "custom_name.tar.xz") + self.assertExists(custom_txz) + os.remove(custom_txz) + customdir_txz = ft.make_archive(tardir, archive_file="custom_name", archive_dir=tmpdir, reproducible=True) + customdir_txz_chksum = ft.compute_checksum(customdir_txz, checksum_type="sha256") + self.assertEqual(customdir_txz, os.path.join(tmpdir, "custom_name.tar.xz")) + self.assertExists(customdir_txz) + os.remove(customdir_txz) + # custom .tar + reprod_tar = ft.make_archive(tardir, archive_file="custom_name.tar", reproducible=True) + reprod_tar_chksum = ft.compute_checksum(reprod_tar, checksum_type="sha256") + self.assertEqual(reprod_tar, "custom_name.tar") + self.assertExists(reprod_tar) + os.remove(reprod_tar) + unreprod_tar = ft.make_archive(tardir, archive_file="custom_name.tar", reproducible=False) + unreprod_tar_chksum = ft.compute_checksum(unreprod_tar, checksum_type="sha256") + self.assertEqual(unreprod_tar, "custom_name.tar") + self.assertExists(unreprod_tar) + os.remove(unreprod_tar) + # custom .tar.gz + self.mock_stdout(True) + self.mock_stderr(True) + custom_tgz = ft.make_archive(tardir, archive_file="custom_name.tar.gz", reproducible=True) + stderr = self.get_stderr() + self.mock_stdout(False) + self.mock_stderr(False) + self.assertIn("WARNING: Requested reproducible archive with unsupported file compression (gz)", stderr) + custom_tgz_chksum = ft.compute_checksum(custom_tgz, checksum_type="sha256") + self.assertEqual(custom_tgz, "custom_name.tar.gz") + self.assertExists(custom_tgz) + os.remove(custom_tgz) + self.mock_stdout(True) + self.mock_stderr(True) + custom_tgz = ft.make_archive(tardir, archive_file="custom_name.tar.gz", reproducible=False) + stderr = self.get_stderr() + self.mock_stdout(False) + self.mock_stderr(False) + self.assertNotIn("WARNING: Requested reproducible archive with unsupported file compression (gz)", stderr) + custom_tgz_chksum = ft.compute_checksum(custom_tgz, checksum_type="sha256") + self.assertEqual(custom_tgz, "custom_name.tar.gz") + self.assertExists(custom_tgz) + os.remove(custom_tgz) + + self.assertErrorRegex(EasyBuildError, "Unsupported archive format.*", ft.make_archive, tardir, "unknown.ext") + + reference_checksum_txz = "ec0f91a462c2743b19b428f4c177d7109d2ccc018dcdedc12570d9d735d6fb1b" + reference_checksum_tar = "6e902e77925ab2faeef8377722434d4482f1fcc74af958c984c3f22509ae5084" + + if sys.version_info[0] >= 3 and sys.version_info[1] >= 9: + # checksums of tarballs made by EB cannot be reliably checked prior to Python 3.9 + # due to changes introduced in python/cpython#90021 + self.assertNotEqual(unreprod_txz_chksum, reference_checksum_txz) + self.assertEqual(reprod_txz_chksum, reference_checksum_txz) + self.assertEqual(custom_txz_chksum, reference_checksum_txz) + self.assertEqual(customdir_txz_chksum, reference_checksum_txz) + self.assertNotEqual(unreprod_tar_chksum, reference_checksum_tar) + self.assertEqual(reprod_tar_chksum, reference_checksum_tar) + self.assertNotEqual(custom_tgz_chksum, reference_checksum_txz) def test_is_sha256_checksum(self): """Test for is_sha256_checksum function."""