From 61bb2f90c93c53f359f4a077d98a0f8b9f9fb020 Mon Sep 17 00:00:00 2001 From: mrrishi Date: Wed, 5 Mar 2025 14:19:12 +0530 Subject: [PATCH 1/2] feat: add additional steps --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 762672e..529670b 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,30 @@ kubectl -n kube-system patch secret civo-node-agent -n kube-system --type='merge -p='{"stringData": {"civo-api-key": "'"$CIVO_API_KEY"'", "node-pool-id": "'"$CIVO_NODE_POOL_ID"'", "desired-gpu-count": "'"$CIVO_DESIRED_GPU_COUNT"'", "time-window": "'"$CIVO_NODE_REBOOT_TIME_WINDOW_MINUTES"'" }}' ``` +## Nvidia Device Plugin Install + +```bash +kubectl create ns gpu-operator +kubectl label namespace gpu-operator pod-security.kubernetes.io/enforce=privileged +kubectl label namespace gpu-operator pod-security.kubernetes.io/warn=privileged +kubectl label namespace gpu-operator pod-security.kubernetes.io/audit=privileged +``` + +```bash +helm repo add nvdp https://nvidia.github.io/k8s-device-plugin \ +&& helm repo update +``` + +```bash +helm install --namespace gpu-operator nvidia-device-plugin nvdp/nvidia-device-plugin --create-namespace \ + --version=0.17.0 \ + --set gfd.enabled=true \ + --set devicePlugin.enabled=true \ + --set dcgm.enabled=true \ + --set nfd.enableNodeFeatureApi=true \ + --wait +``` + ## Install `node-agent` chart ```bash @@ -35,6 +59,7 @@ The following configurations are stored in the `node-agent` secret in the `kube- `civo-api-key`: The civo api key to use when automatically rebooting nodes. To collect this value, go to toue [civo settings security tab](https://dashboard.civo.com/security). +`time-window`: The time-window is the time we need to give a node after a reboot happens ## Temp details until CI is complete From bd67f75492e8717beea2fb3232baf9d7836d7b22 Mon Sep 17 00:00:00 2001 From: mrrishi Date: Wed, 5 Mar 2025 14:23:58 +0530 Subject: [PATCH 2/2] fix: image name and tag --- charts/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/values.yaml b/charts/values.yaml index b14be2e..ba98283 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -2,9 +2,9 @@ replicaCount: 1 image: - repository: jokesta/na + repository: civo/node-agent pullPolicy: IfNotPresent - tag: "0.27" + tag: "3aba5ca" imagePullSecrets: [] nameOverride: ""