diff --git a/packages/by-name/mkNixosConfig/package.nix b/packages/by-name/mkNixosConfig/package.nix index 1e00b77a2c..3b08d93d54 100644 --- a/packages/by-name/mkNixosConfig/package.nix +++ b/packages/by-name/mkNixosConfig/package.nix @@ -41,6 +41,8 @@ lib.makeOverridable ( cloud-api-adaptor kernel-podvm-azure pause-bundle + nvidia-ctk-oci-hook + nvidia-ctk-with-config ; inherit (outerPkgs.kata) kata-agent; }) diff --git a/packages/by-name/nvidia-ctk-oci-hook/package.nix b/packages/by-name/nvidia-ctk-oci-hook/package.nix new file mode 100644 index 0000000000..dfd5d0457d --- /dev/null +++ b/packages/by-name/nvidia-ctk-oci-hook/package.nix @@ -0,0 +1,27 @@ +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +{ + writeShellApplication, + nvidia-ctk-with-config, + lib, +}: +writeShellApplication { + name = "nvidia-ctk-oci-hook"; + + text = '' + # Log the o/p of the hook to a file + ${lib.getExe' nvidia-ctk-with-config "nvidia-container-runtime-hook"} \ + -config ${nvidia-ctk-with-config}/etc/nvidia-container-runtime/config.toml \ + -debug "$@" > /var/log/nvidia-hook.log 2>&1 + ''; + + meta = { + description = "OCI hook for nvidia-container-runtime"; + longDescription = '' + This is an OCI hook (prestart) for the nvidia-container-runtime. It is used to + facilitate GPU containers in peer pods with the necessary drivers, libraries, + and binaries to access the GPU. + ''; + }; +} diff --git a/packages/by-name/nvidia-ctk-with-config/config.toml b/packages/by-name/nvidia-ctk-with-config/config.toml new file mode 100644 index 0000000000..1ae191dd61 --- /dev/null +++ b/packages/by-name/nvidia-ctk-with-config/config.toml @@ -0,0 +1,40 @@ +#accept-nvidia-visible-devices-as-volume-mounts = false +#accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = true +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +#swarm-resource = "DOCKER_RESOURCE_GPU" + +[nvidia-container-cli] +no-pivot = true +debug = "/var/log/nvidia-kata-container/nvidia-container-toolkit.log" +environment = [] +ldcache = "/tmp/ld.so.cache" +ldconfig = "@@glibcbin@/bin/ldconfig" +load-kmods = true +no-cgroups = true +path = "@nvidia-container-cli@" +#root = "/run/nvidia/driver" +#user = "root:video" + +[nvidia-container-runtime] +debug = "/var/log/nvidia-kata-container/nvidia-container-runtime.log" +log-level = "debug" +mode = "cdi" +runtimes = ["docker-runc", "runc", "crun"] + +[nvidia-container-runtime.modes] + +[nvidia-container-runtime.modes.cdi] +annotation-prefixes = ["cdi.k8s.io/"] +default-kind = "nvidia.com/gpu" +spec-dirs = ["/var/run/cdi"] + +[nvidia-container-runtime.modes.csv] +mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] +path = "@nvidia-container-runtime-hook@" +skip-mode-detection = true + +[nvidia-ctk] +path = "@nvidia-ctk@" diff --git a/packages/by-name/nvidia-ctk-with-config/package.nix b/packages/by-name/nvidia-ctk-with-config/package.nix new file mode 100644 index 0000000000..bdf57a1910 --- /dev/null +++ b/packages/by-name/nvidia-ctk-with-config/package.nix @@ -0,0 +1,21 @@ +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +# This builds an nvidia-container-toolkit package with a custom config required +# for use in peer pods GPU containers. + +{ + nvidia-container-toolkit, + libnvidia-container, + replaceVars, + glibc, + lib, +}: +nvidia-container-toolkit.override { + configTemplatePath = replaceVars ./config.toml { + "nvidia-container-cli" = "${lib.getExe' libnvidia-container "nvidia-container-cli"}"; + "nvidia-container-runtime-hook" = "${lib.getExe' nvidia-container-toolkit "nvidia-container-runtime-hook"}"; + "nvidia-ctk" = "${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}"; + "glibcbin" = "${lib.getBin glibc}"; + }; +} diff --git a/packages/nixos/gpu.nix b/packages/nixos/gpu.nix index 021174e341..790afcf7e4 100644 --- a/packages/nixos/gpu.nix +++ b/packages/nixos/gpu.nix @@ -1,7 +1,12 @@ # Copyright 2024 Edgeless Systems GmbH # SPDX-License-Identifier: AGPL-3.0-only -{ config, lib, ... }: +{ + config, + lib, + pkgs, + ... +}: let cfg = config.contrast.gpu; @@ -24,6 +29,8 @@ in }; hardware.nvidia-container-toolkit.enable = true; + image.repart.partitions."10-root".contents."/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh".source = lib.getExe pkgs.nvidia-ctk-oci-hook; + boot.initrd.kernelModules = [ # Extra kernel modules required to talk to the GPU in CC-Mode. "ecdsa_generic"