From 5ced1b7c49337a55bfebcd13c0acd5bd6bbb8eb6 Mon Sep 17 00:00:00 2001 From: Moritz Sanft <58110325+msanft@users.noreply.github.com> Date: Thu, 19 Dec 2024 10:17:58 +0100 Subject: [PATCH 1/3] packages/libnvidia-container-custom: init This adds a custom implementation of libnvidia-container that can resolve binaries (like `nvidia-smi`) correctly on NixOS. This needs to know the location of the binaries. Unfortunately, it is not possible to supply the package build with the correct paths from the Nix store, as the binaries are built in the driver package, which is specific to the host (i.e. the kernel), and thus is only known in the NixOS build, where we already need to have the package, creating a circular dependency. This adds a vendored version of said package with a workaoround that consist of just having it search in the NixOS PATH (i.e. `/run/current-system/sw`), which is fine, since we only use this package in NixOS-scenarios, but can't be upstreamed since it's incompatible with other distributions. --- .../fix-library-resolving.patch | 147 ++++++++++++++++++ .../inline-c-struct.patch | 14 ++ .../libnvidia-container-custom/modprobe.patch | 30 ++++ .../libnvidia-container-custom/package.nix | 140 +++++++++++++++++ 4 files changed, 331 insertions(+) create mode 100644 packages/by-name/libnvidia-container-custom/fix-library-resolving.patch create mode 100644 packages/by-name/libnvidia-container-custom/inline-c-struct.patch create mode 100644 packages/by-name/libnvidia-container-custom/modprobe.patch create mode 100644 packages/by-name/libnvidia-container-custom/package.nix diff --git a/packages/by-name/libnvidia-container-custom/fix-library-resolving.patch b/packages/by-name/libnvidia-container-custom/fix-library-resolving.patch new file mode 100644 index 000000000..616c97932 --- /dev/null +++ b/packages/by-name/libnvidia-container-custom/fix-library-resolving.patch @@ -0,0 +1,147 @@ +From 8799541f99785d2bd881561386676fb0985e939e Mon Sep 17 00:00:00 2001 +From: Moritz Sanft <58110325+msanft@users.noreply.github.com> +Date: Thu, 10 Oct 2024 14:32:42 +0200 +Subject: [PATCH] fix library resolving + +Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com> +--- + src/ldcache.c | 46 +++++++++++++++++----------------------------- + src/ldcache.h | 2 +- + src/nvc_info.c | 10 +++------- + src/nvc_ldcache.c | 2 +- + 4 files changed, 22 insertions(+), 38 deletions(-) + +diff --git a/src/ldcache.c b/src/ldcache.c +index 38bab055..8cd30a0f 100644 +--- a/src/ldcache.c ++++ b/src/ldcache.c +@@ -108,40 +108,28 @@ ldcache_close(struct ldcache *ctx) + + int + ldcache_resolve(struct ldcache *ctx, uint32_t arch, const char *root, const char * const libs[], +- char *paths[], size_t size, ldcache_select_fn select, void *select_ctx) ++ char *paths[], size_t size, const char* version) + { + char path[PATH_MAX]; +- struct header_libc6 *h; +- int override; ++ char dir[PATH_MAX]; ++ char lib[PATH_MAX]; + +- h = (struct header_libc6 *)ctx->ptr; + memset(paths, 0, size * sizeof(*paths)); + +- for (uint32_t i = 0; i < h->nlibs; ++i) { +- int32_t flags = h->libs[i].flags; +- char *key = (char *)ctx->ptr + h->libs[i].key; +- char *value = (char *)ctx->ptr + h->libs[i].value; +- +- if (!(flags & LD_ELF) || (flags & LD_ARCH_MASK) != arch) +- continue; +- +- for (size_t j = 0; j < size; ++j) { +- if (!str_has_prefix(key, libs[j])) +- continue; +- if (path_resolve(ctx->err, path, root, value) < 0) +- return (-1); +- if (paths[j] != NULL && str_equal(paths[j], path)) +- continue; +- if ((override = select(ctx->err, select_ctx, root, paths[j], path)) < 0) +- return (-1); +- if (override) { +- free(paths[j]); +- paths[j] = xstrdup(ctx->err, path); +- if (paths[j] == NULL) +- return (-1); +- } +- break; +- } ++ for (size_t j = 0; j < size; ++j) { ++ snprintf(dir, 100, "@driverLink@/lib"); ++ ++ if (!strncmp(libs[j], "libvdpau_nvidia.so", 100)) ++ strcat(dir, "/vdpau"); ++ snprintf(lib, 100, "%s/%s.%s", dir, libs[j], version); ++ if (path_resolve_full(ctx->err, path, "/", lib) < 0) ++ return (-1); ++ if (!file_exists(ctx->err, path)) ++ continue; ++ ++ paths[j] = xstrdup(ctx->err, path); ++ if (paths[j] == NULL) ++ return (-1); + } + return (0); + } +diff --git a/src/ldcache.h b/src/ldcache.h +index 33d78dd7..2b087dbc 100644 +--- a/src/ldcache.h ++++ b/src/ldcache.h +@@ -50,6 +50,6 @@ void ldcache_init(struct ldcache *, struct error *, const char *); + int ldcache_open(struct ldcache *); + int ldcache_close(struct ldcache *); + int ldcache_resolve(struct ldcache *, uint32_t, const char *, const char * const [], +- char *[], size_t, ldcache_select_fn, void *); ++ char *[], size_t, const char*); + + #endif /* HEADER_LDCACHE_H */ +diff --git a/src/nvc_info.c b/src/nvc_info.c +index b7b8adfa..d42f2beb 100644 +--- a/src/nvc_info.c ++++ b/src/nvc_info.c +@@ -217,15 +217,13 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_ + if (path_resolve_full(err, path, root, ldcache) < 0) + return (-1); + ldcache_init(&ld, err, path); +- if (ldcache_open(&ld) < 0) +- return (-1); + + info->nlibs = size; + info->libs = array_new(err, size); + if (info->libs == NULL) + goto fail; + if (ldcache_resolve(&ld, LIB_ARCH, root, libs, +- info->libs, info->nlibs, select_libraries_fn, info) < 0) ++ info->libs, info->nlibs, info->nvrm_version) < 0) + goto fail; + + info->nlibs32 = size; +@@ -233,13 +231,11 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_ + if (info->libs32 == NULL) + goto fail; + if (ldcache_resolve(&ld, LIB32_ARCH, root, libs, +- info->libs32, info->nlibs32, select_libraries_fn, info) < 0) ++ info->libs32, info->nlibs32, info->nvrm_version) < 0) + goto fail; + rv = 0; + + fail: +- if (ldcache_close(&ld) < 0) +- return (-1); + return (rv); + } + +@@ -253,7 +249,7 @@ find_binary_paths(struct error *err, struct dxcore_context* dxcore, struct nvc_d + char path[PATH_MAX]; + int rv = -1; + +- if ((env = secure_getenv("PATH")) == NULL) { ++ if ((env = "@binaryPath@") == NULL) { + error_setx(err, "environment variable PATH not found"); + return (-1); + } +diff --git a/src/nvc_ldcache.c b/src/nvc_ldcache.c +index db3b2f69..ae5def43 100644 +--- a/src/nvc_ldcache.c ++++ b/src/nvc_ldcache.c +@@ -367,7 +367,7 @@ nvc_ldcache_update(struct nvc_context *ctx, const struct nvc_container *cnt) + if (validate_args(ctx, cnt != NULL) < 0) + return (-1); + +- argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; ++ argv = (char * []){cnt->cfg.ldconfig, "-f", "/tmp/ld.so.conf.nvidia-host", "-C", "/tmp/ld.so.cache.nvidia-host", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; + if (*argv[0] == '@') { + /* + * We treat this path specially to be relative to the host filesystem. +-- +2.46.0 diff --git a/packages/by-name/libnvidia-container-custom/inline-c-struct.patch b/packages/by-name/libnvidia-container-custom/inline-c-struct.patch new file mode 100644 index 000000000..8cc16f33b --- /dev/null +++ b/packages/by-name/libnvidia-container-custom/inline-c-struct.patch @@ -0,0 +1,14 @@ +diff --git a/src/nvcgo.c b/src/nvcgo.c +index 98789a3..47ad02b 100644 +--- a/src/nvcgo.c ++++ b/src/nvcgo.c +@@ -33,7 +33,8 @@ + void nvcgo_program_1(struct svc_req *, register SVCXPRT *); + + static struct nvcgo_ext { +- struct nvcgo; ++ struct rpc rpc; ++ struct libnvcgo api; + bool initialized; + void *dl_handle; + } global_nvcgo_context; diff --git a/packages/by-name/libnvidia-container-custom/modprobe.patch b/packages/by-name/libnvidia-container-custom/modprobe.patch new file mode 100644 index 000000000..1e0f8bddd --- /dev/null +++ b/packages/by-name/libnvidia-container-custom/modprobe.patch @@ -0,0 +1,30 @@ +diff -ruN nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.c nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.c +--- nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.c 2021-11-13 14:36:58.096684602 +0000 ++++ nvidia-modprobe-@modprobeVersion@-patched/modprobe-utils/nvidia-modprobe-utils.c 2021-11-13 14:43:40.965146390 +0000 +@@ -959,10 +959,10 @@ + return mknod_helper(major, minor_num, vgpu_dev_name, NV_PROC_REGISTRY_PATH); + } + +-static int nvidia_cap_get_device_file_attrs(const char* cap_file_path, +- int *major, +- int *minor, +- char *name) ++int nvidia_cap_get_device_file_attrs(const char* cap_file_path, ++ int *major, ++ int *minor, ++ char *name) + { + char field[32]; + FILE *fp; +diff -ruN nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.h nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.h +--- nvidia-modprobe-@modprobeVersion@/modprobe-utils/nvidia-modprobe-utils.h 2021-11-13 14:36:58.096684602 +0000 ++++ nvidia-modprobe-@modprobeVersion@-patched/modprobe-utils/nvidia-modprobe-utils.h 2021-11-13 14:38:34.078700961 +0000 +@@ -87,6 +87,7 @@ + int nvidia_nvswitch_get_file_state(int minor); + int nvidia_cap_mknod(const char* cap_file_path, int *minor); + int nvidia_cap_get_file_state(const char* cap_file_path); ++int nvidia_cap_get_device_file_attrs(const char* cap_file_path, int *major, int *minor, char *name); + int nvidia_cap_imex_channel_mknod(int minor); + int nvidia_cap_imex_channel_file_state(int minor); + int nvidia_get_chardev_major(const char *name); + int nvidia_msr_modprobe(void); diff --git a/packages/by-name/libnvidia-container-custom/package.nix b/packages/by-name/libnvidia-container-custom/package.nix new file mode 100644 index 000000000..d52cfb0a7 --- /dev/null +++ b/packages/by-name/libnvidia-container-custom/package.nix @@ -0,0 +1,140 @@ +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +# Upstream package from https://github.com/NixOS/nixpkgs/blob/nixos-24.11/pkgs/by-name/li/libnvidia-container/package.nix#L145 +# Adapted to use custom paths for binary resolving specialized to the NixOS image we use this in. As this is incompatible with +# non-NixOS deployments, this cannot be upstreamed. + +{ + stdenv, + lib, + addDriverRunpath, + fetchFromGitHub, + pkg-config, + elfutils, + libcap, + libseccomp, + rpcsvc-proto, + libtirpc, + makeWrapper, + substituteAll, + removeReferencesTo, + replaceVars, + go, + binaryPaths ? [ + "/run/current-system/sw" + "/run/opengl-driver/lib" + ], +}: +let + modprobeVersion = "550.54.14"; + nvidia-modprobe = fetchFromGitHub { + owner = "NVIDIA"; + repo = "nvidia-modprobe"; + rev = modprobeVersion; + sha256 = "sha256-iBRMkvOXacs/llTtvc/ZC5i/q9gc8lMuUHxMbu8A+Kg="; + }; + modprobePatch = substituteAll { + src = ./modprobe.patch; + inherit modprobeVersion; + }; +in +stdenv.mkDerivation rec { + pname = "libnvidia-container-custom"; + version = "1.16.2"; + + src = fetchFromGitHub { + owner = "NVIDIA"; + repo = "libnvidia-container"; + rev = "v${version}"; + sha256 = "sha256-hX+2B+0kHiAC2lyo6kwe7DctPLJWgRdbhlc316OO3r8="; + }; + + patches = [ + (replaceVars ./fix-library-resolving.patch { + inherit (addDriverRunpath) driverLink; + binaryPath = lib.makeBinPath binaryPaths; + }) + + ./inline-c-struct.patch + ]; + + postPatch = '' + sed -i \ + -e 's/^REVISION ?=.*/REVISION = ${src.rev}/' \ + -e 's/^COMPILER :=.*/COMPILER = $(CC)/' \ + mk/common.mk + sed -i \ + -e 's/^GIT_TAG ?=.*/GIT_TAG = ${version}/' \ + -e 's/^GIT_COMMIT ?=.*/GIT_COMMIT = ${src.rev}/' \ + versions.mk + mkdir -p deps/src/nvidia-modprobe-${modprobeVersion} + cp -r ${nvidia-modprobe}/* deps/src/nvidia-modprobe-${modprobeVersion} + chmod -R u+w deps/src + pushd deps/src + patch -p0 < ${modprobePatch} + touch nvidia-modprobe-${modprobeVersion}/.download_stamp + popd + # 1. replace DESTDIR=$(DEPS_DIR) with empty strings to prevent copying + # things into deps/src/nix/store + # 2. similarly, remove any paths prefixed with DEPS_DIR + # 3. prevent building static libraries because we don't build static + # libtirpc (for now) + # 4. prevent installation of static libraries because of step 3 + # 5. prevent installation of libnvidia-container-go.so twice + sed -i Makefile \ + -e 's#DESTDIR=\$(DEPS_DIR)#DESTDIR=""#g' \ + -e 's#\$(DEPS_DIR)\$#\$#g' \ + -e 's#all: shared static tools#all: shared tools#g' \ + -e '/$(INSTALL) -m 644 $(LIB_STATIC) $(DESTDIR)$(libdir)/d' \ + -e '/$(INSTALL) -m 755 $(libdir)\/$(LIBGO_SHARED) $(DESTDIR)$(libdir)/d' + ''; + + enableParallelBuilding = true; + + preBuild = '' + HOME="$(mktemp -d)" + ''; + + env.NIX_CFLAGS_COMPILE = toString [ "-I${lib.getInclude libtirpc}/include/tirpc" ]; + NIX_LDFLAGS = [ + "-L${lib.getLib libtirpc}/lib" + "-ltirpc" + ]; + + nativeBuildInputs = [ + pkg-config + go + rpcsvc-proto + makeWrapper + removeReferencesTo + ]; + + buildInputs = [ + elfutils + libcap + libseccomp + libtirpc + ]; + + makeFlags = [ + "WITH_LIBELF=yes" + "prefix=$(out)" + "CFLAGS=-DWITH_TIRPC" + ]; + + postInstall = + let + inherit (addDriverRunpath) driverLink; + libraryPath = lib.makeLibraryPath [ + "$out" + driverLink + "${driverLink}-32" + ]; + in + '' + remove-references-to -t "${go}" $out/lib/libnvidia-container-go.so.${version} + wrapProgram $out/bin/nvidia-container-cli --prefix LD_LIBRARY_PATH : ${libraryPath} + ''; + disallowedReferences = [ go ]; +} From 57dd8edaef02cbac991cfb041e20b96683252db9 Mon Sep 17 00:00:00 2001 From: Moritz Sanft <58110325+msanft@users.noreply.github.com> Date: Thu, 19 Dec 2024 10:18:19 +0100 Subject: [PATCH 2/3] packages/nvidia-ctk-with-config: use libnvidia-container-custom --- packages/by-name/nvidia-ctk-with-config/package.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/by-name/nvidia-ctk-with-config/package.nix b/packages/by-name/nvidia-ctk-with-config/package.nix index 5f136e0c3..17fd18189 100644 --- a/packages/by-name/nvidia-ctk-with-config/package.nix +++ b/packages/by-name/nvidia-ctk-with-config/package.nix @@ -6,14 +6,14 @@ { nvidia-container-toolkit, - libnvidia-container, + libnvidia-container-custom, replaceVars, glibc, lib, }: nvidia-container-toolkit.override { configTemplatePath = replaceVars ./config.toml { - "nvidia-container-cli" = "${lib.getExe' libnvidia-container "nvidia-container-cli"}"; + "nvidia-container-cli" = "${lib.getExe' libnvidia-container-custom "nvidia-container-cli"}"; "nvidia-container-runtime-hook" = "${lib.getExe' nvidia-container-toolkit "nvidia-container-runtime-hook"}"; "nvidia-ctk" = "${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}"; From b1e2cff67ddc64ca2ab1935ba5f1fb52efe402f5 Mon Sep 17 00:00:00 2001 From: Moritz Sanft <58110325+msanft@users.noreply.github.com> Date: Thu, 19 Dec 2024 10:18:56 +0100 Subject: [PATCH 3/3] packages/nixos: use libnvidia-container-custom Make `libnvidia-container-custom` the default `libnvidia-container` in our NixOS build. --- packages/by-name/mkNixosConfig/package.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/by-name/mkNixosConfig/package.nix b/packages/by-name/mkNixosConfig/package.nix index a96255bcc..85c7f66fe 100644 --- a/packages/by-name/mkNixosConfig/package.nix +++ b/packages/by-name/mkNixosConfig/package.nix @@ -49,6 +49,7 @@ lib.makeOverridable ( kata-runtime kata-kernel-uvm ; + libnvidia-container = outerPkgs.libnvidia-container-custom; }) ];