forked from NVIDIA/spark-rapids-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstart_cluster.sh
executable file
·49 lines (44 loc) · 1.23 KB
/
start_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION.
set -eo pipefail
# configure arguments
if [[ -z ${INIT_DEST} ]]; then
echo "Please make sure INIT_DEST is exported per README.md"
exit 1
fi
if [[ -z ${FRAMEWORK} ]]; then
echo "Please make sure FRAMEWORK is exported to torch or tf per README.md"
exit 1
fi
json_config=$(cat <<EOF
{
"cluster_name": "spark-dl-inference-${FRAMEWORK}",
"spark_version": "15.4.x-gpu-ml-scala2.12",
"spark_conf": {
"spark.executor.resource.gpu.amount": "1",
"spark.python.worker.reuse": "true",
"spark.task.resource.gpu.amount": "0.125",
"spark.sql.execution.arrow.pyspark.enabled": "true",
"spark.executor.cores": "8"
},
"node_type_id": "Standard_NC8as_T4_v3",
"driver_node_type_id": "Standard_NC8as_T4_v3",
"spark_env_vars": {
"TF_GPU_ALLOCATOR": "cuda_malloc_async",
"FRAMEWORK": "${FRAMEWORK}"
},
"autotermination_minutes": 60,
"enable_elastic_disk": true,
"init_scripts": [
{
"workspace": {
"destination": "${INIT_DEST}"
}
}
],
"runtime_engine": "STANDARD",
"num_workers": 4
}
EOF
)
databricks clusters create --json "$json_config"