-
Notifications
You must be signed in to change notification settings - Fork 2
/
install-dcgm-exporter.sh
51 lines (42 loc) · 1.64 KB
/
install-dcgm-exporter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
# For more information, see https://docs.nvidia.com/datacenter/cloud-native/kubernetes/dcgme2e.html
# Also look at https://github.com/NVIDIA/gpu-monitoring-tools/blob/master/etc/dcgm-exporter/default-counters.csv for metrics
namespace="dcgm-exporter"
repoName="gpu-helm-charts"
repoUrl="https://nvidia.github.io/gpu-monitoring-tools/helm-charts"
chartName="dcgm-exporter"
releaseName="dcgm-exporter"
# Check if namespace exists in the cluster
result=$(kubectl get ns -o jsonpath="{.items[?(@.metadata.name=='$namespace')].metadata.name}")
if [[ -n $result ]]; then
echo "$namespace namespace already exists in the cluster"
else
echo "$namespace namespace does not exist in the cluster"
echo "creating $namespace namespace in the cluster..."
kubectl create namespace $namespace
fi
# Check if the repository is not already added
result=$(helm repo list | grep $repoName | awk '{print $1}')
if [[ -n $result ]]; then
echo "[$repoName] Helm repo already exists"
else
# Add the Jetstack Helm repository
echo "Adding [$repoName] Helm repo..."
helm repo add $repoName $repoUrl
fi
# Update your local Helm chart repository cache
echo 'Updating Helm repos...'
helm repo update
# Install Helm chart
result=$(helm list -n $namespace | grep $releaseName | awk '{print $1}')
if [[ -n $result ]]; then
echo "[$releaseName] already exists in the [$namespace] namespace"
else
# Install the Helm chart
echo "Deploying [$releaseName] to the [$namespace] namespace..."
helm install $releaseName $repoName/$chartName \
--namespace $namespace \
--values values.yaml
fi
# List pods
kubectl get pods -n $namespace -o wide