-
Notifications
You must be signed in to change notification settings - Fork 3.8k
115 lines (111 loc) · 3.99 KB
/
cuda.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
name: CUDA Version
on:
push:
branches:
- master
pull_request:
branches:
- master
- release/*
# automatically cancel in-progress builds if another commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
github_actions: 'true'
os_name: linux
conda_env: test-env
jobs:
test:
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
runs-on: [self-hosted, linux]
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- method: wheel
compiler: gcc
python_version: "3.11"
cuda_version: "11.8.0"
task: cuda
- method: source
compiler: gcc
python_version: "3.9"
cuda_version: "12.2.0"
task: cuda
- method: pip
compiler: clang
python_version: "3.10"
cuda_version: "11.8.0"
task: cuda
steps:
- name: Setup or update software on host machine
run: |
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
apt-transport-https \
ca-certificates \
curl \
git \
gnupg-agent \
lsb-release \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
containerd.io \
docker-ce \
docker-ce-cli \
nvidia-docker2
sudo chmod a+rw /var/run/docker.sock
sudo systemctl restart docker
- name: Remove old folder with repository
run: sudo rm -rf $GITHUB_WORKSPACE
- name: Checkout repository
uses: actions/checkout@v1
with:
fetch-depth: 5
submodules: true
- name: Setup and run tests
run: |
export ROOT_DOCKER_FOLDER=/LightGBM
cat > docker.env <<EOF
GITHUB_ACTIONS=${{ env.github_actions }}
OS_NAME=${{ env.os_name }}
COMPILER=${{ matrix.compiler }}
TASK=${{ matrix.task }}
METHOD=${{ matrix.method }}
CONDA_ENV=${{ env.conda_env }}
PYTHON_VERSION=${{ matrix.python_version }}
BUILD_DIRECTORY=$ROOT_DOCKER_FOLDER
LGB_VER=$(head -n 1 VERSION.txt)
EOF
cat > docker-script.sh <<EOF
export CONDA=\$HOME/miniforge
export PATH=\$CONDA/bin:\$PATH
nvidia-smi
$ROOT_DOCKER_FOLDER/.ci/setup.sh || exit -1
$ROOT_DOCKER_FOLDER/.ci/test.sh || exit -1
EOF
cuda_version="${{ matrix.cuda_version }}"
cuda_major=${cuda_version%%.*}
docker_img="nvcr.io/nvidia/cuda:${cuda_version}-devel"
if [[ ${cuda_major} -eq 11 ]]; then
docker_img="${docker_img}-ubuntu18.04"
elif [[ ${cuda_major} -ge 12 ]]; then
docker_img="${docker_img}-ubuntu20.04"
fi
docker run --env-file docker.env -v "$GITHUB_WORKSPACE":"$ROOT_DOCKER_FOLDER" --rm --gpus all "$docker_img" /bin/bash $ROOT_DOCKER_FOLDER/docker-script.sh
all-cuda-jobs-successful:
if: always()
runs-on: ubuntu-latest
needs: [test]
steps:
- name: Note that all tests succeeded
uses: re-actors/[email protected]
with:
jobs: ${{ toJSON(needs) }}