This repository was archived by the owner on Nov 2, 2023. It is now read-only.
forked from NVIDIA/deepops
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathJenkinsfile
147 lines (125 loc) · 3.96 KB
/
Jenkinsfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
pipeline {
agent any
environment {
DEEPOPS_FULL_INSTALL = ''
DEEPOPS_VAGRANT_OS = 'ubuntu'
DEEPOPS_OS_VERSION = '20.04'
}
stages {
stage('Stop Any Old Builds') {
steps {
milestone label: '', ordinal: Integer.parseInt(env.BUILD_ID) - 1
milestone label: '', ordinal: Integer.parseInt(env.BUILD_ID)
}
}
stage('Cluster Up') {
steps {
// TODO: ideally lock should work with declared stages
lock(resource: null, label: 'gpu', quantity: 1, variable: 'GPUDATA') {
echo "Reset repo and unmunge files"
sh '''
git reset --hard
rm -rf config
'''
echo "Munge files for testing"
sh '''
bash -x ./workloads/jenkins/scripts/munge-files.sh
'''
echo "Tear down any Vagrant that was not cleaned up"
sh '''
pwd
cd virtual && ./vagrant_shutdown.sh || true
'''
echo "Vagrant Up"
sh '''
bash -x ./workloads/jenkins/scripts/vagrant-startup.sh
'''
echo "Cluster Up"
sh '''
bash -x ./workloads/jenkins/scripts/test-cluster-up.sh
'''
echo "Get K8S Cluster Status"
sh '''
bash -x ./workloads/jenkins/scripts/get-k8s-debug.sh
'''
echo "Verify we can run a GPU job"
sh '''
timeout 500 bash -x ./workloads/jenkins/scripts/run-gpu-job.sh
'''
echo "Verify ingress config"
sh '''
bash -x ./workloads/jenkins/scripts/verify-ingress-config.sh
'''
echo "Verify registry mirror"
sh '''
bash -x ./workloads/jenkins/scripts/test-deepops-registry-mirror.sh
'''
echo "Verify in-cluster k8s docker registry"
sh '''
bash -x ./workloads/jenkins/scripts/test-local-registry.sh
'''
echo "Verify rsyslog forwarding is working for the k8s cluster"
sh '''
bash -x ./workloads/jenkins/scripts/test-rsyslog-k8s.sh
'''
echo "Test Monitoring installation"
sh '''
timeout 800 bash -x ./workloads/jenkins/scripts/test-monitoring.sh
'''
echo "Test Dashboard installation"
sh '''
timeout 180 bash -x ./workloads/jenkins/scripts/test-dashboard.sh
'''
echo "Start new virtual environment pre-Slurm checks"
sh '''
bash -x ./workloads/jenkins/scripts/vagrant-startup.sh
'''
echo "Set up Slurm"
sh '''
bash -x ./workloads/jenkins/scripts/test-setup-slurm.sh
'''
echo "Get Slurm Cluster Status"
sh '''
bash -x ./workloads/jenkins/scripts/get-slurm-debug.sh
'''
echo "Test Slurm"
sh '''
timeout 60 bash -x ./workloads/jenkins/scripts/test-slurm-job.sh
'''
echo "Test NFS"
sh '''
timeout 60 bash -x ./workloads/jenkins/scripts/test-slurm-nfs-mount.sh
'''
echo "Test MPI"
sh '''
timeout 60 bash -x ./workloads/jenkins/scripts/test-mpi-job.sh
'''
echo "Test Enroot"
sh '''
timeout 120 bash -x ./workloads/jenkins/scripts/test-slurm-enroot-job.sh
'''
echo "Verify rsyslog forwarding is working for the slurm cluster"
sh '''
bash -x ./workloads/jenkins/scripts/test-rsyslog-slurm.sh
'''
echo "Test GPU job"
sh '''
timeout 60 bash -x ./workloads/jenkins/scripts/test-slurm-gpu.sh
'''
echo "Test DCGM metrics"
sh '''
timeout 600 bash -x ./workloads/jenkins/scripts/test-dcgm-metrics.sh slurm-node
'''
}
}
}
}
post {
always {
sh '''
pwd
cd virtual && ./vagrant_shutdown.sh
'''
}
}
}