Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance cargo easyblock for sources from git repositories #3483

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 166 additions & 51 deletions easybuild/easyblocks/generic/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

@author: Mikael Oehman (Chalmers University of Technology)
@author: Alex Domingo (Vrije Universiteit Brussel)
@author: Alexander Grund (TU Dresden)
"""

import os
Expand All @@ -37,10 +38,10 @@
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.framework.easyconfig import CUSTOM
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
from easybuild.tools.filetools import extract_file, change_dir
from easybuild.tools.filetools import extract_file
from easybuild.tools.run import run_cmd
from easybuild.tools.config import build_option
from easybuild.tools.filetools import compute_checksum, mkdir, write_file
from easybuild.tools.filetools import compute_checksum, mkdir, move_file, read_file, write_file, CHECKSUM_TYPE_SHA256
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC

CRATESIO_SOURCE = "https://crates.io/api/v1/crates"
Expand All @@ -54,14 +55,74 @@

"""

CONFIG_TOML_PATCH_GIT = """
[patch."{repo}"]
{crates}
CONFIG_TOML_SOURCE_GIT = """
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "vendored-sources"

"""
CONFIG_TOML_PATCH_GIT_CRATES = """{0} = {{ path = "{1}" }}

CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
[source."real-{url}?rev={rev}"]
directory = "{workspace_dir}"

[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "real-{url}?rev={rev}"

"""

CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{chksum}"}}'
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'


def get_workspace_members(crate_dir):
"""Find all members of a cargo workspace in crate_dir.

(Minimally) parse the Cargo.toml file.
If it is a workspace return all members (subfolder names).
Otherwise return None.
"""
cargo_toml = os.path.join(crate_dir, 'Cargo.toml')

# We are looking for this:
# [workspace]
# members = [
# "reqwest-middleware",
# "reqwest-tracing",
# "reqwest-retry",
# ]

lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
try:
start_idx = lines.index('[workspace]')
except ValueError:
return None
# Find "members = [" and concatenate the value, stop at end of section or file
member_str = None
for line in lines[start_idx + 1:]:
if line.startswith('#'):
continue # Skip comments
if re.match(r'\[\w+\]', line):
break
if member_str is None:
m = re.match(r'members\s+=\s+\[', line)
if m:
member_str = line[m.end():]
elif line.endswith(']'):
member_str += line[:-1].strip()
break
else:
member_str += line
# Split at commas after removing possibly trailing ones and remove the quotes
members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')]
# Sanity check that we didn't pick up anything unexpected
invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)]
if invalid_members:
raise EasyBuildError('Failed to parse %s: Found seemingly invalid members: %s',
cargo_toml, ', '.join(invalid_members))
return [os.path.join(crate_dir, m) for m in members]


class Cargo(ExtensionEasyBlock):
Expand All @@ -81,12 +142,15 @@ def extra_options(extra_vars=None):
return extra_vars

@staticmethod
def crate_src_filename(pkg_name, pkg_version, *args):
"""Crate tarball filename based on package name and version"""
return "{0}-{1}.tar.gz".format(pkg_name, pkg_version)
def crate_src_filename(pkg_name, pkg_version, _=None, rev=None):
"""Crate tarball filename based on package name, version and optionally git revision"""
parts = [pkg_name, pkg_version]
if rev is not None:
parts.append(rev)
return '-'.join(parts) + ".tar.gz"

@staticmethod
def crate_download_filename(pkg_name, pkg_version, *args):
def crate_download_filename(pkg_name, pkg_version):
"""Crate download filename based on package name and version"""
return "{0}/{1}/download".format(pkg_name, pkg_version)

Expand Down Expand Up @@ -122,7 +186,6 @@ def __init__(self, *args, **kwargs):
"""Constructor for Cargo easyblock."""
super(Cargo, self).__init__(*args, **kwargs)
self.cargo_home = os.path.join(self.builddir, '.cargo')
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
env.setvar('CARGO_HOME', self.cargo_home)
env.setvar('RUSTC', 'rustc')
env.setvar('RUSTDOC', 'rustdoc')
Expand All @@ -148,7 +211,7 @@ def __init__(self, *args, **kwargs):
repo_name = repo_name[:-4]
sources.append({
'git_config': {'url': url, 'repo_name': repo_name, 'commit': rev},
'filename': self.crate_src_filename(crate, version),
'filename': self.crate_src_filename(crate, version, rev=rev),
})

# copy EasyConfig instance before we make changes to it
Expand All @@ -165,67 +228,119 @@ def extract_step(self):
"""
Unpack the source files and populate them with required .cargo-checksum.json if offline
"""
mkdir(self.vendor_dir)
vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
mkdir(vendor_dir)
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
# If we put such a workspace into the vendor folder, cargo fails with
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
mkdir(git_vendor_dir)

vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
git_sources = {crate[2]: [] for crate in self.crates if len(crate) == 4}
# Track git sources for building the cargo config and avoiding duplicated folders
git_sources = {}

for src in self.src:
extraction_dir = self.builddir
# Check for git crates, `git_key` will be set to a true-ish value for those
try:
crate_name, _, git_repo, rev = vendor_crates[src['name']]
except (ValueError, KeyError):
git_key = None
else:
git_key = (git_repo, rev)
self.log.debug("Sources of %s(%s) belong to git repo: %s rev %s",
crate_name, src['name'], git_repo, rev)
# Do a sanity check that sources for the same repo and revision are the same
try:
previous_source = git_sources[git_key]
except KeyError:
git_sources[git_key] = src
else:
previous_checksum = previous_source['checksum']
current_checksum = src['checksum']
if previous_checksum and current_checksum and previous_checksum != current_checksum:
raise EasyBuildError("Sources for the same git repository need to be identical."
"Mismatch found for %s rev %s in %s vs %s",
git_repo, rev, previous_source['name'], src['name'])
self.log.info("Source %s already extracted to %s by %s. Skipping extraction.",
src['name'], previous_source['finalpath'], previous_source['name'])
src['finalpath'] = previous_source['finalpath']
continue

is_vendor_crate = src['name'] in vendor_crates
# Extract dependency crates into vendor subdirectory, separate from sources of main package
if src['name'] in vendor_crates:
extraction_dir = self.vendor_dir
if is_vendor_crate:
extraction_dir = git_vendor_dir if git_key else vendor_dir
else:
extraction_dir = self.builddir

self.log.info("Unpacking source of %s", src['name'])
existing_dirs = set(os.listdir(extraction_dir))
crate_dir = None
src_dir = extract_file(src['path'], extraction_dir, cmd=src['cmd'],
extra_options=self.cfg['unpack_options'], change_into_dir=False)
new_extracted_dirs = set(os.listdir(extraction_dir)) - existing_dirs

if len(new_extracted_dirs) == 1:
# Expected crate tarball with 1 folder
crate_dir = new_extracted_dirs.pop()
src_dir = os.path.join(extraction_dir, crate_dir)
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
elif len(new_extracted_dirs) == 0:
if len(new_extracted_dirs) == 0:
# Extraction went wrong
raise EasyBuildError("Unpacking sources of '%s' failed", src['name'])
# Expected crate tarball with 1 folder
# TODO: properly handle case with multiple extracted folders
# this is currently in a grey area, might still be used by cargo
if len(new_extracted_dirs) == 1:
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)

change_dir(src_dir)
self.src[self.src.index(src)]['finalpath'] = src_dir

if self.cfg['offline'] and crate_dir:
# Create checksum file for extracted sources required by vendored crates.io sources
self.log.info('creating .cargo-checksums.json file for : %s', crate_dir)
chksum = compute_checksum(src['path'], checksum_type='sha256')
chkfile = os.path.join(extraction_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(chksum=chksum))
# Add path to extracted sources for any crate from a git repo
try:
crate_name, _, crate_repo, _ = vendor_crates[src['name']]
except (ValueError, KeyError):
pass
else:
self.log.debug("Sources of %s belong to git repo: %s", src['name'], crate_repo)
git_src_dir = (crate_name, src_dir)
git_sources[crate_repo].append(git_src_dir)
if is_vendor_crate and self.cfg['offline']:
# Create checksum file for extracted sources required by vendored crates

# By default there is only a single crate
crate_dirs = [src_dir]
# For git sources determine the folders that contain crates by taking workspaces into account
if git_key:
member_dirs = get_workspace_members(src_dir)
if member_dirs:
crate_dirs = member_dirs

try:
checksum = src[CHECKSUM_TYPE_SHA256]
except KeyError:
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
for crate_dir in crate_dirs:
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
chkfile = os.path.join(src_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
# Move non-workspace git crates to the vendor folder
if git_key and member_dirs is None:
src_dir = os.path.join(vendor_dir, os.path.basename(crate_dirs[0]))
move_file(crate_dirs[0], src_dir)

src['finalpath'] = src_dir

if self.cfg['offline']:
self.log.info("Setting vendored crates dir for offline operation")
config_toml = os.path.join(self.cargo_home, 'config.toml')
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
# because the rust source subdirectories might differ with python packages
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=self.vendor_dir), append=True)

# also vendor sources from other git sources (could be many crates for one git source)
for git_repo, repo_crates in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s", git_repo)
config_crates = ''.join([CONFIG_TOML_PATCH_GIT_CRATES.format(*crate) for crate in repo_crates])
write_file(config_toml, CONFIG_TOML_PATCH_GIT.format(repo=git_repo, crates=config_crates), append=True)
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=vendor_dir), append=True)

# Tell cargo about the vendored git sources to avoid it failing with:
# Unable to update https://github.com/[...]
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
for (git_repo, rev), src in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s rev %s", git_repo, rev)
src_dir = src['finalpath']
if os.path.dirname(src_dir) == vendor_dir:
# Non-workspace sources are in vendor_dir
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT.format(url=git_repo, rev=rev),
append=True)
else:
# Workspace sources stay in their own separate folder.
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
append=True)

# Use environment variable since it would also be passed along to builds triggered via python packages
env.setvar('CARGO_NET_OFFLINE', 'true')
Expand Down
Loading