From 51487f348f8027d3e7cc369ee4991014871fb032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Wei=C3=9Fe?= Date: Tue, 15 Oct 2024 17:28:36 +0200 Subject: [PATCH] tools: periodically clean up bare-metal servers --- packages/cleanup-bm.sh | 58 +++++++++++++++ packages/containers.nix | 8 +++ packages/scripts.nix | 10 +++ tools/bm-maintenance/deployment_tdx_snp.yml | 78 +++++++++++++++++++++ 4 files changed, 154 insertions(+) create mode 100755 packages/cleanup-bm.sh create mode 100644 tools/bm-maintenance/deployment_tdx_snp.yml diff --git a/packages/cleanup-bm.sh b/packages/cleanup-bm.sh new file mode 100755 index 0000000000..5293c91512 --- /dev/null +++ b/packages/cleanup-bm.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +set -euo pipefail + +echo "Starting cleanup" + +resourcesToCheck=( + "pods" + "deployments" + "statefulsets" + "daemonsets" + "replicasets" + "jobs" + "cronjobs" +) + +touch usedRuntimeClasses +for resource in "${resourcesToCheck[@]}"; do + kubectl get "${resource}" --all-namespaces -o jsonpath='{.items[*].spec.runtimeClassName}' 2>/dev/null | + tr ' ' '\n' >>usedRuntimeClasses +done + +kubectl get pods --all-namespaces -o jsonpath='{.items[?(@.metadata.annotations.contrast\.edgeless\.systems/pod-role=="contrast-node-installer")].spec.containers[0].args[1]}' | + tr ' ' '\n' | + grep -o "contrast-cc-.\+" >>usedRuntimeClasses +sort -u usedRuntimeClasses -o usedRuntimeClasses + +mapfile -t unusedRuntimeClasses < <( + comm -13 usedRuntimeClasses <( + kubectl get runtimeclass -o jsonpath='{.items[*].metadata.name}' | + tr ' ' '\n' | + grep '^contrast-cc' | + sort -u + ) +) + +for runtimeClass in "${unusedRuntimeClasses[@]}"; do + # Delete unused runtime classes + echo "Deleting runtimeclass ${runtimeClass} ..." + kubectl delete runtimeclass "${runtimeClass}" + + # Delete unused files + if [ -d "${OPTEDGELESS}/${runtimeClass}" ]; then + echo "Deleting files from ${OPTEDGELESS}/${runtimeClass} ..." + rm -rf "${OPTEDGELESS:?}/${runtimeClass}" + fi + if [ -d "/var/lib/${SNAPSHOTTER}-snapshotter" ]; then + echo "Deleting files from /var/lib/${SNAPSHOTTER}-snapshotter/${runtimeClass} ..." + rm -rf "/var/lib/${SNAPSHOTTER}-snapshotter/${runtimeClass}" + fi + + # Remove references from containerd config + echo "Removing ${runtimeClass} from ${CONFIG} ..." + dasel delete --file "${CONFIG}" --indent 0 --read toml --write toml "plugins.io\.containerd\.grpc\.v1\.cri.containerd.runtimes.${runtimeClass}" 2>/dev/null + dasel delete --file "${CONFIG}" --indent 0 --read toml --write toml "proxy_plugins.${SNAPSHOTTER}-${runtimeClass}" 2>/dev/null +done diff --git a/packages/containers.nix b/packages/containers.nix index fa3f6a9cc7..8fbb462c19 100644 --- a/packages/containers.nix +++ b/packages/containers.nix @@ -155,6 +155,14 @@ let Env = [ "PATH=/bin" ]; # This is only here for policy generation. }; }; + + cleanup-bm = dockerTools.buildImage { + name = "cleanup-bm"; + tag = "v0.0.1"; + config = { + Cmd = [ "${lib.getExe pkgs.scripts.cleanup-bm}" ]; + }; + }; }; in containers diff --git a/packages/scripts.nix b/packages/scripts.nix index bc2f4bf55e..050c6e3e01 100644 --- a/packages/scripts.nix +++ b/packages/scripts.nix @@ -490,4 +490,14 @@ kubectl apply -k "$tmpdir/overlays/azure" ''; }; + + cleanup-bm = writeShellApplication { + name = "cleanup-bm"; + runtimeInputs = with pkgs; [ + busybox + kubectl + dasel + ]; + text = builtins.readFile ./cleanup-bm.sh; + }; } diff --git a/tools/bm-maintenance/deployment_tdx_snp.yml b/tools/bm-maintenance/deployment_tdx_snp.yml new file mode 100644 index 0000000000..9503aee83c --- /dev/null +++ b/tools/bm-maintenance/deployment_tdx_snp.yml @@ -0,0 +1,78 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cleanup-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cleanup-role +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["list"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["list"] + - apiGroups: ["node.k8s.io"] + resources: ["runtimeclasses"] + verbs: ["list", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cleanup-binding +subjects: + - kind: ServiceAccount + name: cleanup-sa + namespace: default +roleRef: + kind: ClusterRole + name: cleanup-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: cleanup-maintenance +spec: + schedule: "0 0 * * 0" + jobTemplate: + spec: + template: + spec: + serviceAccountName: cleanup-sa + containers: + - name: cleanup + image: ghcr.io/edgelesssys/contrast/cleanup-bm:v0.0.1 + env: + - name: OPTEDGELESS + value: /opt/edgeless + - name: CONFIG + value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl + - name: SNAPSHOTTER + value: nydus + volumeMounts: + - name: opt-edgeless + mountPath: /opt/edgeless + - name: snapshotter-data + mountPath: /var/lib/nydus-snapshotter + - name: containerd-config + mountPath: /var/lib/rancher/k3s/agent/etc/containerd + volumes: + - name: opt-edgeless + hostPath: + path: /opt/edgeless + type: Directory + - name: snapshotter-data + hostPath: + path: /var/lib/nydus-snapshotter + type: Directory + - name: containerd-config + hostPath: + path: /var/lib/rancher/k3s/agent/etc/containerd + type: Directory + restartPolicy: OnFailure