-
Notifications
You must be signed in to change notification settings - Fork 3.8k
136 lines (131 loc) · 4.4 KB
/
cuda.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
name: CUDA Version
on:
push:
branches:
- master
pull_request:
branches:
- master
# Run manually by clicking a button in the UI
workflow_dispatch:
inputs:
restart_docker:
description: 'Restart nvidia-docker on the runner before building?'
required: true
type: boolean
default: false
# automatically cancel in-progress builds if another commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
# Optionally reinstall + restart docker on the runner before building.
# This is safe as long as only 1 of these jobs runs at a time.
restart-docker:
name: set up docker
runs-on: [self-hosted, linux]
timeout-minutes: 30
steps:
- name: Setup or update software on host machine
if: ${{ inputs.restart_docker }}
run: |
# install core packages
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
lsb-release \
software-properties-common
# set up nvidia-docker
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y
curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install --no-install-recommends -y \
containerd.io \
docker-ce \
docker-ce-cli \
nvidia-docker2
sudo chmod a+rw /var/run/docker.sock
sudo systemctl restart docker
- name: mark job successful
run: |
exit 0
test:
name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (${{ matrix.linux_version }}, ${{ matrix.compiler }}, Python ${{ matrix.python_version }})
runs-on: [self-hosted, linux]
needs: [restart-docker]
container:
image: nvcr.io/nvidia/cuda:${{ matrix.cuda_version }}-devel-${{ matrix.linux_version }}
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
COMPILER: ${{ matrix.compiler }}
CONDA: /tmp/miniforge
DEBIAN_FRONTEND: noninteractive
METHOD: ${{ matrix.method }}
OS_NAME: linux
PYTHON_VERSION: ${{ matrix.python_version }}
TASK: ${{ matrix.task }}
SKBUILD_STRICT_CONFIG: true
options: --gpus all
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- method: wheel
compiler: gcc
python_version: "3.10"
cuda_version: "12.6.1"
linux_version: "ubuntu22.04"
task: cuda
- method: source
compiler: gcc
python_version: "3.12"
cuda_version: "12.2.2"
linux_version: "ubuntu22.04"
task: cuda
- method: pip
compiler: clang
python_version: "3.11"
cuda_version: "11.8.0"
linux_version: "ubuntu20.04"
task: cuda
steps:
- name: Install latest git and sudo
run: |
apt-get update
apt-get install --no-install-recommends -y \
ca-certificates \
software-properties-common
add-apt-repository ppa:git-core/ppa -y
apt-get update
apt-get install --no-install-recommends -y \
git \
sudo
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 5
submodules: true
- name: Setup and run tests
run: |
export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
export PATH=$CONDA/bin:$PATH
# check GPU usage
nvidia-smi
# build and test
$GITHUB_WORKSPACE/.ci/setup.sh
$GITHUB_WORKSPACE/.ci/test.sh
all-cuda-jobs-successful:
if: always()
runs-on: ubuntu-latest
needs: [test]
steps:
- name: Note that all tests succeeded
uses: re-actors/[email protected]
with:
jobs: ${{ toJSON(needs) }}