Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add peerpods support for the node-installer #959

Merged
merged 11 commits into from
Nov 4, 2024
Merged
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,5 @@ terraform.tfstate*
id_rsa*
kube.conf
out.env
infra/**/kustomization.yaml
infra/**/workload-identity.yaml
infra/**/peer-pods-config.yaml
uplosi.conf*
2 changes: 1 addition & 1 deletion cli/genpolicy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func NewConfig(platform platforms.Platform) *Config {
Settings: aksSettings,
Bin: aksGenpolicyBin,
}
case platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.AKSPeerSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
return &Config{
Rules: kataRules,
Settings: kataSettings,
Expand Down
2 changes: 1 addition & 1 deletion cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func buildVersionString() (string, error) {
switch platform {
case platforms.AKSCloudHypervisorSNP:
fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.MicrosoftGenpolicyVersion)
case platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.AKSPeerSNP, platforms.K3sQEMUSNP, platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
fmt.Fprintf(versionsWriter, "\tgenpolicy version:\t%s\n", constants.KataGenpolicyVersion)
}
}
Expand Down
96 changes: 34 additions & 62 deletions infra/azure-peerpods/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,6 @@ resource "azurerm_role_assignment" "ra_network_contributor" {
principal_id = azuread_service_principal.sp.object_id
}

resource "azuread_application_federated_identity_credential" "federated_credentials" {
display_name = local.name
application_id = azuread_application.app.id
issuer = azurerm_kubernetes_cluster.cluster.oidc_issuer_url
subject = "system:serviceaccount:confidential-containers-system:cloud-api-adaptor"
audiences = ["api://AzureADTokenExchange"]
}

resource "azuread_application_password" "cred" {
application_id = azuread_application.app.id
}
Expand Down Expand Up @@ -128,65 +120,45 @@ resource "local_file" "kubeconfig" {
content = azurerm_kubernetes_cluster.cluster.kube_config_raw
}

resource "local_file" "workload_identity" {
filename = "./workload-identity.yaml"
data "local_file" "id_rsa" {
filename = "id_rsa.pub"
}

resource "local_file" "peer-pods-config" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing that needs to be done now, but if we still want to have a "working peerpods cluster" through a single terraform apply, using the Kubernetes Terraform provider might be cleaner here.

filename = "./peer-pods-config.yaml"
file_permission = "0777"
content = <<EOF
apiVersion: apps/v1
kind: DaemonSet
apiVersion: v1
kind: ConfigMap
metadata:
name: cloud-api-adaptor-daemonset
namespace: confidential-containers-system
spec:
template:
metadata:
labels:
azure.workload.identity/use: "true"
name: peer-pods-cm
data:
AZURE_CLIENT_ID: ${azuread_application.app.client_id}
AZURE_TENANT_ID: ${data.azurerm_subscription.current.tenant_id}
AZURE_AUTHORITY_HOST: https://login.microsoftonline.com/
AZURE_IMAGE_ID: ${var.image_id}
AZURE_INSTANCE_SIZE: Standard_DC2as_v5
AZURE_REGION: ${data.azurerm_resource_group.rg.location}
AZURE_RESOURCE_GROUP: ${data.azurerm_resource_group.rg.name}
AZURE_SUBNET_ID: ${one(azurerm_virtual_network.main.subnet.*.id)}
AZURE_SUBSCRIPTION_ID: ${data.azurerm_subscription.current.subscription_id}
CLOUD_PROVIDER: azure
DISABLECVM: "false"
---
apiVersion: v1
kind: ServiceAccount
data:
AZURE_CLIENT_SECRET: ${base64encode(azuread_application_password.cred.value)}
kind: Secret
metadata:
name: cloud-api-adaptor
namespace: confidential-containers-system
annotations:
azure.workload.identity/client-id: ${azuread_application.app.client_id}
EOF
}

resource "local_file" "kustomization" {
filename = "./kustomization.yaml"
file_permission = "0777"
content = <<EOF
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
bases:
- ../../yamls
images:
- name: cloud-api-adaptor
newName: quay.io/confidential-containers/cloud-api-adaptor
newTag: v0.9.0-amd64
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: peer-pods-cm
namespace: confidential-containers-system
literals:
- CLOUD_PROVIDER=azure
- AZURE_SUBSCRIPTION_ID=${data.azurerm_subscription.current.subscription_id}
- AZURE_REGION=${data.azurerm_resource_group.rg.location}
- AZURE_INSTANCE_SIZE=Standard_DC2as_v5
- AZURE_RESOURCE_GROUP=${data.azurerm_resource_group.rg.name}
- AZURE_SUBNET_ID=${one(azurerm_virtual_network.main.subnet.*.id)}
- AZURE_IMAGE_ID=${var.image_id}
- DISABLECVM=false
secretGenerator:
- name: peer-pods-secret
namespace: confidential-containers-system
- name: ssh-key-secret
namespace: confidential-containers-system
files:
- id_rsa.pub
patchesStrategicMerge:
- workload-identity.yaml
name: azure-client-secret
---
type: Opaque
apiVersion: v1
data:
id_rsa.pub: ${data.local_file.id_rsa.content_base64}
kind: Secret
metadata:
name: ssh-key-secret
type: Opaque
EOF
}
92 changes: 83 additions & 9 deletions internal/kuberesource/parts.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
),
}

containerdPath := "/var/lib/rancher/k3s/agent/containerd"
if platform == platforms.AKSPeerSNP {
containerdPath = "/var/lib/containerd"
}

nydusSnapshotter := Container().
WithName("nydus-snapshotter").
WithImage("ghcr.io/edgelesssys/contrast/nydus-snapshotter:latest").
Expand Down Expand Up @@ -113,7 +118,7 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
Volume().
WithName("var-lib-containerd").
WithHostPath(HostPathVolumeSource().
WithPath("/var/lib/rancher/k3s/agent/containerd").
WithPath(containerdPath).
WithType(corev1.HostPathDirectory),
),
Volume().
Expand All @@ -124,18 +129,87 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
),
}

cloudAPIAdaptor := Container().
WithName("cloud-api-adaptor").
// TODO(freax13): Don't hard-code this
WithImage("quay.io/confidential-containers/cloud-api-adaptor:v0.9.0-amd64").
WithVolumeMounts(
VolumeMount().
WithName("ssh").
WithMountPath("/root/.ssh/").
WithReadOnly(true),
VolumeMount().
WithName("pods-dir").
WithMountPath("/run/peerpod"),
VolumeMount().
WithName("netns").
WithMountPath("/run/netns").
WithMountPropagation(corev1.MountPropagationHostToContainer),
).
WithArgs(
"/usr/local/bin/entrypoint.sh",
).
WithEnv(
NewEnvVar("optionals", fmt.Sprintf("-socket /run/peerpod/hypervisor-%s.sock ", runtimeHandler)),
).
WithEnvFrom(
applycorev1.EnvFromSource().
WithConfigMapRef(
applycorev1.ConfigMapEnvSource().
WithName("peer-pods-cm"),
),
applycorev1.EnvFromSource().
WithSecretRef(applycorev1.SecretEnvSource().
WithName("azure-client-secret"),
),
).
WithSecurityContext(
applycorev1.SecurityContext().
WithCapabilities(
applycorev1.Capabilities().
WithAdd(
corev1.Capability("NET_ADMIN"),
corev1.Capability("SYS_ADMIN"),
),
),
)
cloudAPIAdaptorVolumes := []*applycorev1.VolumeApplyConfiguration{
Volume().
WithName("pods-dir").
WithHostPath(HostPathVolumeSource().
WithPath("/run/peerpod").
WithType(corev1.HostPathDirectoryOrCreate),
),
Volume().
WithName("netns").
WithHostPath(HostPathVolumeSource().
WithPath("/run/netns").
WithType(corev1.HostPathDirectory),
),
Volume().
WithName("ssh").
WithSecret(applycorev1.SecretVolumeSource().
WithDefaultMode(0o600).
WithSecretName("ssh-key-secret"),
),
}

var nodeInstallerImageURL string
var snapshotter *applycorev1.ContainerApplyConfiguration
var snapshotterVolumes []*applycorev1.VolumeApplyConfiguration
var containers []*applycorev1.ContainerApplyConfiguration
var volumes []*applycorev1.VolumeApplyConfiguration
switch platform {
case platforms.AKSCloudHypervisorSNP:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-microsoft:latest"
snapshotter = tardevSnapshotter
snapshotterVolumes = tardevSnapshotterVolumes
containers = []*applycorev1.ContainerApplyConfiguration{tardevSnapshotter}
volumes = tardevSnapshotterVolumes
case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.RKE2QEMUTDX:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
snapshotter = nydusSnapshotter
snapshotterVolumes = nydusSnapshotterVolumes
containers = []*applycorev1.ContainerApplyConfiguration{nydusSnapshotter}
volumes = nydusSnapshotterVolumes
case platforms.AKSPeerSNP:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
containers = []*applycorev1.ContainerApplyConfiguration{nydusSnapshotter, cloudAPIAdaptor}
volumes = append(nydusSnapshotterVolumes, cloudAPIAdaptorVolumes...)
default:
return nil, fmt.Errorf("unsupported platform %q", platform)
}
Expand Down Expand Up @@ -167,10 +241,10 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
WithCommand("/bin/node-installer", platform.String()),
).
WithContainers(
snapshotter,
containers...,
).
WithVolumes(append(
snapshotterVolumes,
volumes,
Volume().
WithName("host-mount").
WithHostPath(HostPathVolumeSource().
Expand Down
8 changes: 7 additions & 1 deletion internal/platforms/platforms.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const (
Unknown Platform = iota
// AKSCloudHypervisorSNP represents a deployment with Cloud-Hypervisor on SEV-SNP AKS.
AKSCloudHypervisorSNP
// AKSPeerSNP represents a deployment with peer-pods on SEV-SNP AKS.
AKSPeerSNP
// K3sQEMUTDX represents a deployment with QEMU on bare-metal TDX K3s.
K3sQEMUTDX
// K3sQEMUSNP represents a deployment with QEMU on bare-metal SNP K3s.
Expand All @@ -28,7 +30,7 @@ const (

// All returns a list of all available platforms.
func All() []Platform {
return []Platform{AKSCloudHypervisorSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX}
return []Platform{AKSCloudHypervisorSNP, AKSPeerSNP, K3sQEMUTDX, K3sQEMUSNP, RKE2QEMUTDX}
}

// AllStrings returns a list of all available platforms as strings.
Expand All @@ -45,6 +47,8 @@ func (p Platform) String() string {
switch p {
case AKSCloudHypervisorSNP:
return "AKS-CLH-SNP"
case AKSPeerSNP:
return "AKS-PEER-SNP"
case K3sQEMUTDX:
return "K3s-QEMU-TDX"
case K3sQEMUSNP:
Expand All @@ -61,6 +65,8 @@ func FromString(s string) (Platform, error) {
switch strings.ToLower(s) {
case "aks-clh-snp":
return AKSCloudHypervisorSNP, nil
case "aks-peer-snp":
return AKSPeerSNP, nil
case "k3s-qemu-tdx":
return K3sQEMUTDX, nil
case "k3s-qemu-snp":
Expand Down
23 changes: 11 additions & 12 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,10 @@ node-installer platform=default_platform:
just push "tardev-snapshotter"
just push "node-installer-microsoft"
;;
"K3s-QEMU-SNP"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX")
"AKS-PEER-SNP"|"K3s-QEMU-SNP"|"K3s-QEMU-TDX"|"RKE2-QEMU-TDX")
just push "nydus-snapshotter"
just push "node-installer-kata"
;;
"AKS-PEER-SNP")
nix run -L .#scripts.deploy-caa -- \
--kustomization=./infra/azure-peerpods/kustomization.yaml \
--workload-identity=./infra/azure-peerpods/workload-identity.yaml \
--pub-key=./infra/azure-peerpods/id_rsa.pub
;;
*)
echo "Unsupported platform: {{ platform }}"
exit 1
Expand All @@ -73,7 +67,7 @@ e2e target=default_deploy_target platform=default_platform: soft-clean coordinat
--skip-undeploy=true

# Generate policies, apply Kubernetes manifests.
deploy target=default_deploy_target cli=default_cli platform=default_platform: (runtime target platform) (apply "runtime") (populate target platform) (generate cli platform) (apply target)
deploy target=default_deploy_target cli=default_cli platform=default_platform: (runtime target platform) (apply-runtime target platform) (populate target platform) (generate cli platform) (apply target)

# Populate the workspace with a runtime class deployment
runtime target=default_deploy_target platform=default_platform:
Expand Down Expand Up @@ -127,15 +121,20 @@ generate cli=default_cli platform=default_platform:
;;
esac

# Apply the runtime.
apply-runtime target=default_deploy_target platform=default_platform:
#!/usr/bin/env bash
set -euo pipefail
kubectl apply -f ./{{ workspace_dir }}/runtime
if [[ {{ platform }} == "AKS-PEER-SNP" ]]; then
kubectl apply -f ./infra/azure-peerpods/peer-pods-config.yaml --namespace {{ target }}${namespace_suffix-}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are still unable to deploy the runtime without knowing the target namespace, right? Please create a ticket with a detailed issue description.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this has been fixed. Instead of using federated credentials, we know use a application password credential and directly pass a client id and secret to the cloud-api-adaptor. Conventionally, we already had a application password credential set up:
https://github.com/edgelesssys/contrast/blob/0362d8ccc7892311a799dfca717ac84cc595cea9/infra/azure-peerpods/main.tf#L80C11-L82

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Who is supposed to place peer-pods-config.yaml in that directory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Terraform:

resource "local_file" "peer-pods-config" {
filename = "./peer-pods-config.yaml"
file_permission = "0777"
content = <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: peer-pods-cm
data:
AZURE_CLIENT_ID: ${azuread_application.app.client_id}
AZURE_TENANT_ID: ${data.azurerm_subscription.current.tenant_id}
AZURE_AUTHORITY_HOST: https://login.microsoftonline.com/
AZURE_IMAGE_ID: ${var.image_id}
AZURE_INSTANCE_SIZE: Standard_DC2as_v5
AZURE_REGION: ${data.azurerm_resource_group.rg.location}
AZURE_RESOURCE_GROUP: ${data.azurerm_resource_group.rg.name}
AZURE_SUBNET_ID: ${one(azurerm_virtual_network.main.subnet.*.id)}
AZURE_SUBSCRIPTION_ID: ${data.azurerm_subscription.current.subscription_id}
CLOUD_PROVIDER: azure
DISABLECVM: "false"
---
apiVersion: v1
data:
AZURE_CLIENT_SECRET: ${base64encode(azuread_application_password.cred.value)}
kind: Secret
metadata:
name: azure-client-secret
---
type: Opaque
apiVersion: v1
data:
id_rsa.pub: ${data.local_file.id_rsa.content_base64}
kind: Secret
metadata:
name: ssh-key-secret
type: Opaque
EOF
}

fi

# Apply Kubernetes manifests from /deployment
apply target=default_deploy_target:
#!/usr/bin/env bash
set -euo pipefail
case {{ target }} in
"runtime")
kubectl apply -f ./{{ workspace_dir }}/runtime
exit 0
;;
"openssl" | "emojivoto" | "volume-stateful-set")
:
;;
Expand Down
23 changes: 23 additions & 0 deletions nodeinstaller/internal/constants/configuration-peerpod.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# upstream source: https://github.com/kata-containers/kata-containers/blob/51bc71b8d96874cf4a555e1084ee07e948bff957/src/runtime/config/configuration-remote.toml.in
[hypervisor.remote]
remote_hypervisor_socket = "/run/peerpod/hypervisor.sock"
remote_hypervisor_timeout = 600
enable_annotations = ["machine_type", "default_memory", "default_vcpus"]
firmware = ""
default_bridges = 1
disable_selinux = false
disable_guest_selinux = true

[agent.kata]

[runtime]
internetworking_model = "none"
disable_guest_seccomp = true
disable_new_netns = true
sandbox_cgroup_only = false
static_sandbox_resource_mgmt = true
vfio_mode = "guest-kernel"
disable_guest_empty_dir = false
experimental = []
create_container_timeout = 60
dan_conf = "/run/edgeless/kata-containers/dans"
Loading