Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

caa: support peerpods in node-installer (again) #1043

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
ff35b2a
Reapply "platforms: add AKS-PEER-SNP"
burgerdev Nov 27, 2024
7b0769d
Reapply "genpolicy: use kata genpolicy for AKS-PEER-SNP"
burgerdev Nov 27, 2024
02b2d61
Reapply "node-installer: add kata config for AKS-PEER-SNP"
burgerdev Nov 27, 2024
ae8c559
Reapply "node-installer: support AKS-PEER-SNP when patching container…
burgerdev Nov 27, 2024
63b147d
Reapply "kuberesource: use different containerd path for AKS-PEER-SNP"
burgerdev Nov 27, 2024
d1038e0
Reapply "kuberesource: include CAA in node-installer pod on AKS-PEER-…
burgerdev Nov 27, 2024
7ad0a5d
Reapply "contrast: add empty reference values for AKS-PEER-SNP"
burgerdev Nov 27, 2024
074c57e
Reapply "kuberesource: use our own CAA image"
burgerdev Nov 27, 2024
65adf73
Reapply "infra/azure-peerpods: create k8s resources for configuring CAA"
burgerdev Nov 27, 2024
ad9a365
Reapply "justfile: use node-installer instead of deploy-caa"
burgerdev Nov 27, 2024
2966494
Reapply "justfile: push cloud-api-adaptor"
burgerdev Nov 27, 2024
4090ea4
caa: adjust peer-pods test for node-installer-based CAA
burgerdev Nov 29, 2024
43f064a
ci: add skip-undeploy to peerpods test
burgerdev Dec 6, 2024
3a4a1c1
Revert "Reapply "kuberesource: use different containerd path for AKS-…
burgerdev Dec 10, 2024
6438e14
fixup! Reapply "kuberesource: include CAA in node-installer pod on AK…
burgerdev Dec 10, 2024
7773a77
fixup! Reapply "node-installer: support AKS-PEER-SNP when patching co…
burgerdev Dec 10, 2024
d1b8911
fixup! Reapply "justfile: push cloud-api-adaptor"
burgerdev Dec 10, 2024
27e38a9
fixup! caa: adjust peer-pods test for node-installer-based CAA
burgerdev Dec 11, 2024
066c727
Revert "packages/nixos: add IMDS setup script"
burgerdev Dec 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/e2e_peerpods.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ on:
image-id:
description: "ID of the guest VM image to test (default: build a fresh image)"
required: false
skip-undeploy:
description: "Skip undeploy"
required: false
type: boolean
default: false
pull_request:
paths:
- .github/workflows/e2e_peerpods.yml
Expand All @@ -18,8 +23,17 @@ on:
jobs:
test:
runs-on: ubuntu-22.04
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Log in to ghcr.io Container registry
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/actions/setup_nix
with:
githubToken: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -34,7 +48,9 @@ jobs:
azure_image_id: ${{ inputs.image-id }}
azure_resource_group: contrast-ci
azure_location: germanywestcentral
container_registry: ghcr.io/edgelesssys
CONTRAST_CACHE_DIR: "./workspace.cache"
DO_NOT_TRACK: 1
run: |
ssh-keygen -t rsa -f ./infra/azure-peerpods/id_rsa -N ""
cat >infra/azure-peerpods/iam.auto.tfvars <<EOF
Expand All @@ -45,6 +61,6 @@ jobs:
EOF
nix run .#scripts.test-peerpods
- name: Terminate cluster
if: always()
if: always() && !inputs.skip-undeploy
run: |
nix run -L .#terraform -- -chdir=infra/azure-peerpods destroy --auto-approve
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ terraform.tfstate*
id_rsa*
kube.conf
out.env
infra/**/peer-pods-config.yaml
infra/**/kustomization.yaml
uplosi.conf*
2 changes: 1 addition & 1 deletion cli/genpolicy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func NewConfig(platform platforms.Platform) *Config {
Settings: aksSettings,
Bin: aksGenpolicyBin,
}
case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.AKSPeerSNP, platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
return &Config{
Rules: kataRules,
Settings: kataSettings,
Expand Down
2 changes: 1 addition & 1 deletion cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func buildVersionString() (string, error) {
switch platform {
case platforms.AKSCloudHypervisorSNP:
fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.MicrosoftGenpolicyVersion)
case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.AKSPeerSNP, platforms.MetalQEMUSNP, platforms.MetalQEMUTDX, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.KataGenpolicyVersion)
}
}
Expand Down
44 changes: 44 additions & 0 deletions infra/azure-peerpods/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,47 @@ secretGenerator:
- id_rsa.pub
EOF
}


data "local_file" "id_rsa" {
filename = "id_rsa.pub"
}

resource "local_file" "peer-pods-config" {
filename = "./peer-pods-config.yaml"
file_permission = "0777"
content = <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: peer-pods-cm
data:
AZURE_CLIENT_ID: ${var.client_id}
AZURE_TENANT_ID: ${data.azurerm_subscription.current.tenant_id}
AZURE_AUTHORITY_HOST: https://login.microsoftonline.com/
AZURE_IMAGE_ID: ${var.image_id}
AZURE_INSTANCE_SIZE: Standard_DC2as_v5
AZURE_REGION: ${data.azurerm_resource_group.rg.location}
AZURE_RESOURCE_GROUP: ${data.azurerm_resource_group.rg.name}
AZURE_SUBNET_ID: ${one(azurerm_virtual_network.main.subnet.*.id)}
AZURE_SUBSCRIPTION_ID: ${data.azurerm_subscription.current.subscription_id}
CLOUD_PROVIDER: azure
DISABLECVM: "false"
---
apiVersion: v1
data:
AZURE_CLIENT_SECRET: ${base64encode(var.client_secret)}
kind: Secret
metadata:
name: azure-client-secret
---
type: Opaque
apiVersion: v1
data:
id_rsa.pub: ${data.local_file.id_rsa.content_base64}
kind: Secret
metadata:
name: ssh-key-secret
type: Opaque
EOF
}
101 changes: 91 additions & 10 deletions internal/kuberesource/parts.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,34 +118,112 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
),
}

cloudAPIAdaptor := Container().
WithName("cloud-api-adaptor").
WithImage("ghcr.io/edgelesssys/contrast/cloud-api-adaptor:latest").
WithVolumeMounts(
VolumeMount().
WithName("ssh").
WithMountPath("/.ssh/").
WithReadOnly(true),
VolumeMount().
WithName("pods-dir").
WithMountPath("/run/peerpod"),
VolumeMount().
WithName("netns").
WithMountPath("/run/netns").
WithMountPropagation(corev1.MountPropagationHostToContainer),
VolumeMount().
WithName("netns").
WithMountPath("/var/run/netns").
WithMountPropagation(corev1.MountPropagationHostToContainer),
).
WithEnv(
NewEnvVar("optionals", fmt.Sprintf("-socket /run/peerpod/hypervisor-%s.sock ", runtimeHandler)),
applycorev1.EnvVar().WithName("NODE_NAME").WithValueFrom(applycorev1.EnvVarSource().WithFieldRef(applycorev1.ObjectFieldSelector().WithFieldPath("spec.nodeName"))),
).
WithEnvFrom(
applycorev1.EnvFromSource().
WithConfigMapRef(
applycorev1.ConfigMapEnvSource().
WithName("peer-pods-cm"),
),
applycorev1.EnvFromSource().
WithSecretRef(applycorev1.SecretEnvSource().
WithName("azure-client-secret"),
),
).
WithSecurityContext(
applycorev1.SecurityContext().
WithCapabilities(
applycorev1.Capabilities().
WithAdd(
corev1.Capability("NET_ADMIN"),
corev1.Capability("SYS_ADMIN"),
),
),
)
cloudAPIAdaptorVolumes := []*applycorev1.VolumeApplyConfiguration{
Volume().
WithName("pods-dir").
WithHostPath(HostPathVolumeSource().
WithPath("/run/peerpod").
WithType(corev1.HostPathDirectoryOrCreate),
),
Volume().
WithName("netns").
WithHostPath(HostPathVolumeSource().
WithPath("/run/netns").
WithType(corev1.HostPathDirectory),
),
Volume().
WithName("ssh").
WithSecret(applycorev1.SecretVolumeSource().
WithDefaultMode(0o600).
WithSecretName("ssh-key-secret"),
),
}

var nodeInstallerImageURL string
var snapshotter *applycorev1.ContainerApplyConfiguration
var snapshotterVolumes []*applycorev1.VolumeApplyConfiguration
var serviceAccount string
var containers []*applycorev1.ContainerApplyConfiguration
var volumes []*applycorev1.VolumeApplyConfiguration
switch platform {
case platforms.AKSCloudHypervisorSNP:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-microsoft:latest"
snapshotter = tardevSnapshotter
snapshotterVolumes = tardevSnapshotterVolumes
containers = append(containers, tardevSnapshotter)
volumes = tardevSnapshotterVolumes
case platforms.MetalQEMUSNP, platforms.MetalQEMUTDX:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
snapshotter = nydusSnapshotter
containers = append(containers, nydusSnapshotter)
nydusSnapshotterVolumes = append(nydusSnapshotterVolumes, Volume().
WithName("var-lib-containerd").
WithHostPath(HostPathVolumeSource().
WithPath("/var/lib/containerd").
WithType(corev1.HostPathDirectory),
))
snapshotterVolumes = nydusSnapshotterVolumes
volumes = nydusSnapshotterVolumes
case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.RKE2QEMUTDX:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
snapshotter = nydusSnapshotter
containers = append(containers, nydusSnapshotter)
nydusSnapshotterVolumes = append(nydusSnapshotterVolumes, Volume().
WithName("var-lib-containerd").
WithHostPath(HostPathVolumeSource().
WithPath("/var/lib/rancher/k3s/agent/containerd").
WithType(corev1.HostPathDirectory),
))
snapshotterVolumes = nydusSnapshotterVolumes
volumes = nydusSnapshotterVolumes
case platforms.AKSPeerSNP:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
containers = append(containers, nydusSnapshotter, cloudAPIAdaptor)
nydusSnapshotterVolumes = append(nydusSnapshotterVolumes, Volume().
WithName("var-lib-containerd").
WithHostPath(HostPathVolumeSource().
WithPath("/var/lib/containerd").
WithType(corev1.HostPathDirectory),
))
volumes = append(nydusSnapshotterVolumes, cloudAPIAdaptorVolumes...)
serviceAccount = "cloud-api-adaptor"
default:
return nil, fmt.Errorf("unsupported platform %q", platform)
}
Expand All @@ -164,6 +242,7 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
}).
WithSpec(PodSpec().
WithHostPID(true).
WithHostNetwork(true).
WithInitContainers(Container().
WithName("installer").
WithImage(nodeInstallerImageURL).
Expand All @@ -176,11 +255,13 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
WithMountPath("/host")).
WithCommand("/bin/node-installer", platform.String()),
).
WithServiceAccountName(serviceAccount).
WithAutomountServiceAccountToken(true).
WithContainers(
snapshotter,
containers...,
).
WithVolumes(append(
snapshotterVolumes,
volumes,
Volume().
WithName("host-mount").
WithHostPath(HostPathVolumeSource().
Expand Down
8 changes: 7 additions & 1 deletion internal/platforms/platforms.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const (
Unknown Platform = iota
// AKSCloudHypervisorSNP represents a deployment with Cloud-Hypervisor on SEV-SNP AKS.
AKSCloudHypervisorSNP
// AKSPeerSNP represents a deployment with peer-pods on SEV-SNP AKS.
AKSPeerSNP
// K3sQEMUTDX represents a deployment with QEMU on bare-metal TDX K3s.
K3sQEMUTDX
// K3sQEMUSNP represents a deployment with QEMU on bare-metal SNP K3s.
Expand All @@ -32,7 +34,7 @@ const (

// All returns a list of all available platforms.
func All() []Platform {
return []Platform{AKSCloudHypervisorSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX, MetalQEMUSNP, MetalQEMUTDX}
return []Platform{AKSCloudHypervisorSNP, AKSPeerSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX, MetalQEMUSNP, MetalQEMUTDX}
}

// AllStrings returns a list of all available platforms as strings.
Expand All @@ -49,6 +51,8 @@ func (p Platform) String() string {
switch p {
case AKSCloudHypervisorSNP:
return "AKS-CLH-SNP"
case AKSPeerSNP:
return "AKS-PEER-SNP"
case K3sQEMUTDX:
return "K3s-QEMU-TDX"
case K3sQEMUSNP:
Expand All @@ -69,6 +73,8 @@ func FromString(s string) (Platform, error) {
switch strings.ToLower(s) {
case "aks-clh-snp":
return AKSCloudHypervisorSNP, nil
case "aks-peer-snp":
return AKSPeerSNP, nil
case "k3s-qemu-tdx":
return K3sQEMUTDX, nil
case "k3s-qemu-snp":
Expand Down
22 changes: 14 additions & 8 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ node-installer platform=default_platform:
just push "node-installer-kata"
;;
"AKS-PEER-SNP")
nix run -L .#scripts.deploy-caa -- \
--kustomization=./infra/azure-peerpods/kustomization.yaml \
--pub-key=./infra/azure-peerpods/id_rsa.pub
just push "nydus-snapshotter"
just push "node-installer-kata"
just push "cloud-api-adaptor"
;;
*)
echo "Unsupported platform: {{ platform }}"
Expand All @@ -76,7 +76,7 @@ e2e target=default_deploy_target platform=default_platform: soft-clean coordinat
--namespace-suffix=${namespace_suffix-}

# Generate policies, apply Kubernetes manifests.
deploy target=default_deploy_target cli=default_cli platform=default_platform: (runtime target platform) (apply "runtime") (populate target platform) (generate cli platform) (apply target)
deploy target=default_deploy_target cli=default_cli platform=default_platform: (runtime target platform) (apply-runtime target platform) (populate target platform) (generate cli platform) (apply target)

# Populate the workspace with a runtime class deployment
runtime target=default_deploy_target platform=default_platform:
Expand Down Expand Up @@ -130,15 +130,21 @@ generate cli=default_cli platform=default_platform:
;;
esac

# Apply the runtime.
apply-runtime target=default_deploy_target platform=default_platform:
#!/usr/bin/env bash
set -euo pipefail
kubectl apply -f ./{{ workspace_dir }}/runtime
if [[ {{ platform }} == "AKS-PEER-SNP" ]]; then
kubectl apply -f ./infra/azure-peerpods/peer-pods-config.yaml --namespace {{ target }}${namespace_suffix-}
nix run .#scripts.deploy-caa-rbac {{ target }}${namespace_suffix-}
fi

# Apply Kubernetes manifests from /deployment
apply target=default_deploy_target:
#!/usr/bin/env bash
set -euo pipefail
case {{ target }} in
"runtime")
kubectl apply -f ./{{ workspace_dir }}/runtime
exit 0
;;
"openssl" | "emojivoto" | "volume-stateful-set")
:
;;
Expand Down
7 changes: 7 additions & 0 deletions nodeinstaller/internal/config/kata_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

package config

import "github.com/pelletier/go-toml/v2"

// KataRuntimeConfig is the configuration for the Kata runtime.
// Source: https://github.com/kata-containers/kata-containers/blob/4029d154ba0c26fcf4a8f9371275f802e3ef522c/src/runtime/pkg/katautils/config.go
// This is a simplified version of the actual configuration.
Expand All @@ -14,6 +16,11 @@ type KataRuntimeConfig struct {
Runtime KataRuntime
}

// Marshal encodes the configuration as TOML.
func (k *KataRuntimeConfig) Marshal() ([]byte, error) {
return toml.Marshal(k)
}

// Image is the configuration for the image.
type Image map[string]any

Expand Down
28 changes: 28 additions & 0 deletions nodeinstaller/internal/config/kata_runtime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2024 Edgeless Systems GmbH
// SPDX-License-Identifier: AGPL-3.0-only

package config_test

import (
"testing"

"github.com/edgelesssys/contrast/internal/platforms"
"github.com/edgelesssys/contrast/nodeinstaller/internal/constants"
"github.com/stretchr/testify/require"
)

func TestConfigHasKataSection(t *testing.T) {
// This is a regression test that ensures the `agent.kata` section is not optimized away. Empty
// section and no section are handled differently by Kata, so we make sure that this section is
// always present.
for _, platform := range platforms.All() {
t.Run(platform.String(), func(t *testing.T) {
require := require.New(t)
cfg, err := constants.KataRuntimeConfig("/", platforms.AKSPeerSNP, "", false)
require.NoError(err)
configBytes, err := cfg.Marshal()
require.NoError(err)
require.Contains(string(configBytes), "[Agent.kata]")
})
}
}
Loading
Loading