diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5b393db1db..6719f3716a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -298,7 +298,7 @@ jobs: - name: Create coordinator resource definitions run: | mkdir -p workspace - for platform in aks-clh-snp k3s-qemu-tdx k3s-qemu-snp k3s-qemu-snp-gpu rke2-qemu-tdx; do + for platform in aks-clh-snp k3s-qemu-tdx k3s-qemu-snp k3s-qemu-snp-gpu rke2-qemu-tdx metal-qemu-snp-gpu; do nix run .#scripts.write-coordinator-yaml -- "${coordinatorImgTagged}" "${platform}" > workspace/coordinator-$platform.yml echo -n "${platform} " >> workspace/coordinator-policy.hash yq < workspace/coordinator-$platform.yml \ diff --git a/cli/genpolicy/config.go b/cli/genpolicy/config.go index 327f520f6b..ab74fe1daa 100644 --- a/cli/genpolicy/config.go +++ b/cli/genpolicy/config.go @@ -43,7 +43,9 @@ func NewConfig(platform platforms.Platform) *Config { Settings: aksSettings, Bin: aksGenpolicyBin, } - case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX: + case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, + platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX, + platforms.MetalQEMUSNPGPU: return &Config{ Rules: kataRules, Settings: kataSettings, diff --git a/cli/main.go b/cli/main.go index e2a2c61833..6a22a22a20 100644 --- a/cli/main.go +++ b/cli/main.go @@ -105,7 +105,9 @@ func buildVersionString() (string, error) { switch platform { case platforms.AKSCloudHypervisorSNP: fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.MicrosoftGenpolicyVersion) - case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.K3sQEMUSNPGPU, platforms.RKE2QEMUTDX: + case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, + platforms.K3sQEMUTDX, platforms.K3sQEMUSNPGPU, platforms.RKE2QEMUTDX, + platforms.MetalQEMUSNPGPU: fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.KataGenpolicyVersion) } } diff --git a/e2e/internal/contrasttest/contrasttest.go b/e2e/internal/contrasttest/contrasttest.go index 7ed77bc29e..d9d35bf3ea 100644 --- a/e2e/internal/contrasttest/contrasttest.go +++ b/e2e/internal/contrasttest/contrasttest.go @@ -202,7 +202,8 @@ func (ct *ContrastTest) patchReferenceValues(t *testing.T, platform platforms.Pl SNPVersion: toPtr(manifest.SVN(255)), MicrocodeVersion: toPtr(manifest.SVN(255)), } - case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU: + case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, + platforms.MetalQEMUSNPGPU: // The generate command doesn't fill in all required fields when // generating a manifest for baremetal SNP. Do that now. for i, snp := range m.ReferenceValues.SNP { @@ -372,7 +373,9 @@ func (ct *ContrastTest) FactorPlatformTimeout(timeout time.Duration) time.Durati switch ct.Platform { case platforms.AKSCloudHypervisorSNP: // AKS defined is the baseline return timeout - case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX: + case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, + platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX, + platforms.MetalQEMUSNPGPU: return 2 * timeout default: return timeout diff --git a/internal/kuberesource/parts.go b/internal/kuberesource/parts.go index a47754263c..1eba2a8556 100644 --- a/internal/kuberesource/parts.go +++ b/internal/kuberesource/parts.go @@ -126,7 +126,7 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-microsoft:latest" snapshotter = tardevSnapshotter snapshotterVolumes = tardevSnapshotterVolumes - case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX: + case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.MetalQEMUSNPGPU: nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest" snapshotter = nydusSnapshotter nydusSnapshotterVolumes = append(nydusSnapshotterVolumes, Volume(). diff --git a/internal/platforms/platforms.go b/internal/platforms/platforms.go index 3c73f20e8b..dd32910c6e 100644 --- a/internal/platforms/platforms.go +++ b/internal/platforms/platforms.go @@ -30,11 +30,13 @@ const ( MetalQEMUTDX // K3sQEMUSNPGPU represents a deployment with QEMU on bare-metal SNP K3s with GPU passthrough. K3sQEMUSNPGPU + // MetalQEMUSNPGPU is the generic platform for bare-metal SNP deployments with GPU passthrough. + MetalQEMUSNPGPU ) // All returns a list of all available platforms. func All() []Platform { - return []Platform{AKSCloudHypervisorSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX, MetalQEMUSNP, MetalQEMUTDX, K3sQEMUSNPGPU} + return []Platform{AKSCloudHypervisorSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX, MetalQEMUSNP, MetalQEMUTDX, K3sQEMUSNPGPU, MetalQEMUSNPGPU} } // AllStrings returns a list of all available platforms as strings. @@ -61,6 +63,8 @@ func (p Platform) String() string { return "RKE2-QEMU-TDX" case MetalQEMUSNP: return "Metal-QEMU-SNP" + case MetalQEMUSNPGPU: + return "Metal-QEMU-SNP-GPU" case MetalQEMUTDX: return "Metal-QEMU-TDX" default: @@ -83,6 +87,8 @@ func FromString(s string) (Platform, error) { return RKE2QEMUTDX, nil case "metal-qemu-snp": return MetalQEMUSNP, nil + case "metal-qemu-snp-gpu": + return MetalQEMUSNPGPU, nil case "metal-qemu-tdx": return MetalQEMUTDX, nil default: diff --git a/justfile b/justfile index 5f070c61ff..53cd67dedb 100644 --- a/justfile +++ b/justfile @@ -47,7 +47,7 @@ node-installer platform=default_platform: just push "tardev-snapshotter" just push "node-installer-microsoft" ;; - "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") + "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"Metal-QEMU-SNP-GPU"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") just push "nydus-snapshotter" just push "node-installer-kata" ;; @@ -117,7 +117,7 @@ generate cli=default_cli platform=default_platform: # On baremetal SNP, we don't have default values for MinimumTCB, so we need to set some here. case {{ platform }} in - "Metal-QEMU-SNP"|"K3s-QEMU-SNP") + "Metal-QEMU-SNP"|"Metal-QEMU-SNP-GPU"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU") yq --inplace \ '.ReferenceValues.snp.[].MinimumTCB = {"BootloaderVersion":0,"TEEVersion":0,"SNPVersion":0,"MicrocodeVersion":0}' \ {{ workspace_dir }}/manifest.json @@ -186,7 +186,7 @@ create-pre platform=default_platform: # TODO(burgerdev): this should create the resource group for consistency : ;; - "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") + "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"Metal-QEMU-SNP-GPU"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") : ;; "AKS-PEER-SNP") @@ -215,7 +215,7 @@ create platform=default_platform: "AKS-CLH-SNP") nix run -L .#scripts.create-coco-aks -- --name="$azure_resource_group" --location="$azure_location" ;; - "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") + "Metal-QEMU-SNP"|"Metal-QEMU-TDX"|"Metal-QEMU-SNP-GPU"|"K3s-QEMU-SNP"|"K3s-QEMU-SNP-GPU"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX") : ;; "AKS-PEER-SNP") diff --git a/nodeinstaller/internal/config/kata_runtime_test.go b/nodeinstaller/internal/config/kata_runtime_test.go index bfef090fb3..91ff64b795 100644 --- a/nodeinstaller/internal/config/kata_runtime_test.go +++ b/nodeinstaller/internal/config/kata_runtime_test.go @@ -28,7 +28,9 @@ func TestKataConfig(t *testing.T) { assert.Contains(string(configBytes), "[Runtime]") switch platform { - case platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.RKE2QEMUTDX: + case platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.K3sQEMUTDX, + platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.RKE2QEMUTDX, + platforms.MetalQEMUSNPGPU: assert.Contains(string(configBytes), "[Hypervisor.qemu]") case platforms.AKSCloudHypervisorSNP: assert.Contains(string(configBytes), "[Hypervisor.clh]") diff --git a/nodeinstaller/internal/constants/constants.go b/nodeinstaller/internal/constants/constants.go index a245b8f43a..58a50c556a 100644 --- a/nodeinstaller/internal/constants/constants.go +++ b/nodeinstaller/internal/constants/constants.go @@ -75,7 +75,8 @@ func KataRuntimeConfig(baseDir string, platform platforms.Platform, qemuExtraKer if debug { config.Hypervisor["qemu"]["enable_debug"] = true } - case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU: + case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, + platforms.MetalQEMUSNPGPU: if err := toml.Unmarshal([]byte(kataBareMetalQEMUSNPBaseConfig), &config); err != nil { return nil, fmt.Errorf("failed to unmarshal kata runtime configuration: %w", err) } @@ -95,7 +96,7 @@ func KataRuntimeConfig(baseDir string, platform platforms.Platform, qemuExtraKer config.Hypervisor["qemu"]["enable_debug"] = true } // GPU-specific settings - if platform == platforms.K3sQEMUSNPGPU { + if platform == platforms.K3sQEMUSNPGPU || platform == platforms.MetalQEMUSNPGPU { config.Hypervisor["qemu"]["guest_hook_path"] = "/usr/share/oci/hooks" config.Hypervisor["qemu"]["cold_plug_vfio"] = "root-port" // GPU images tend to be larger, so give a better default timeout that @@ -141,12 +142,13 @@ func ContainerdRuntimeConfigFragment(baseDir, snapshotter string, platform platf cfg.Options = map[string]any{ "ConfigPath": filepath.Join(baseDir, "etc", "configuration-qemu-tdx.toml"), } - case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU: + case platforms.MetalQEMUSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, + platforms.MetalQEMUSNPGPU: cfg.Options = map[string]any{ "ConfigPath": filepath.Join(baseDir, "etc", "configuration-qemu-snp.toml"), } // For GPU support, we need to pass through the CDI annotations. - if platform == platforms.K3sQEMUSNPGPU { + if platform == platforms.K3sQEMUSNPGPU || platform == platforms.MetalQEMUSNPGPU { cfg.PodAnnotations = append(cfg.PodAnnotations, "cdi.k8s.io/*") } default: diff --git a/nodeinstaller/node-installer.go b/nodeinstaller/node-installer.go index d4d2c52e25..bdeb200e72 100644 --- a/nodeinstaller/node-installer.go +++ b/nodeinstaller/node-installer.go @@ -107,7 +107,7 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform, case platforms.AKSCloudHypervisorSNP: kataConfigPath = filepath.Join(kataConfigPath, "configuration-clh-snp.toml") containerdConfigPath = filepath.Join(hostMount, "etc", "containerd", "config.toml") - case platforms.MetalQEMUSNP: + case platforms.MetalQEMUSNP, platforms.MetalQEMUSNPGPU: kataConfigPath = filepath.Join(kataConfigPath, "configuration-qemu-snp.toml") containerdConfigPath = filepath.Join(hostMount, "etc", "containerd", "config.toml") case platforms.MetalQEMUTDX: @@ -145,7 +145,8 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform, } switch platform { - case platforms.AKSCloudHypervisorSNP, platforms.MetalQEMUSNP, platforms.MetalQEMUTDX: + case platforms.AKSCloudHypervisorSNP, platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, + platforms.MetalQEMUSNPGPU: return restartHostContainerd(containerdConfigPath, "containerd") case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU: if hostServiceExists("k3s") { @@ -212,7 +213,9 @@ func patchContainerdConfig(runtimeHandler, basePath, configPath string, platform case platforms.AKSCloudHypervisorSNP: snapshotterName = fmt.Sprintf("tardev-%s", runtimeHandler) socketName = fmt.Sprintf("/run/containerd/tardev-snapshotter-%s.sock", runtimeHandler) - case platforms.MetalQEMUTDX, platforms.MetalQEMUSNP, platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.RKE2QEMUTDX: + case platforms.MetalQEMUTDX, platforms.MetalQEMUSNP, platforms.K3sQEMUTDX, + platforms.K3sQEMUSNP, platforms.K3sQEMUSNPGPU, platforms.RKE2QEMUTDX, + platforms.MetalQEMUSNPGPU: snapshotterName = fmt.Sprintf("nydus-%s", runtimeHandler) socketName = fmt.Sprintf("/run/containerd/containerd-nydus-grpc-%s.sock", runtimeHandler) diff --git a/packages/by-name/contrast/package.nix b/packages/by-name/contrast/package.nix index 8fdfa6ec79..a598f381b6 100644 --- a/packages/by-name/contrast/package.nix +++ b/packages/by-name/contrast/package.nix @@ -56,9 +56,9 @@ let k3s-qemu-tdx-handler = runtimeHandler "k3s-qemu-tdx" kata.contrast-node-installer-image.runtimeHash; rke2-qemu-tdx-handler = runtimeHandler "rke2-qemu-tdx" kata.contrast-node-installer-image.runtimeHash; metal-qemu-snp-handler = runtimeHandler "metal-qemu-snp" kata.contrast-node-installer-image.runtimeHash; + metal-qemu-snp-gpu-handler = runtimeHandler "metal-qemu-snp-gpu" kata.contrast-node-installer-image.runtimeHash; k3s-qemu-snp-handler = runtimeHandler "k3s-qemu-snp" kata.contrast-node-installer-image.runtimeHash; k3s-qemu-snp-gpu-handler = runtimeHandler "k3s-qemu-snp-gpu" kata.contrast-node-installer-image.runtimeHash; - aksRefVals = { snp = [ { @@ -135,6 +135,7 @@ let "${k3s-qemu-tdx-handler}" = tdxRefVals; "${rke2-qemu-tdx-handler}" = tdxRefVals; "${metal-qemu-snp-handler}" = snpRefVals; + "${metal-qemu-snp-gpu-handler}" = snpRefVals; "${k3s-qemu-snp-handler}" = snpRefVals; "${k3s-qemu-snp-gpu-handler}" = snpRefVals; }