Skip to content

Commit

Permalink
Merge branch 'master' into jpb/fix_gtl
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy authored Jun 23, 2024
2 parents a0861c3 + 71acbb7 commit 0b8888b
Show file tree
Hide file tree
Showing 29 changed files with 369 additions and 172 deletions.
100 changes: 37 additions & 63 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
key: "cuda-build-openmpi"
agents:
queue: "juliagpu"
cuda: "11.0"
cuda: "*"
env:
OPENMPI_VER: "4.1"
OPENMPI_VER_FULL: "4.1.4"
Expand Down Expand Up @@ -44,61 +44,26 @@

- wait

- label: "Tests -- Julia 1.6"
- label: "Tests -- Julia {{matrix.version}}"
matrix:
setup:
version:
- "1.6"
- "1.7"
- "1.8"
- "1.9"
- "1.10"
concurrency: 1
concurrency_group: mpi_cuda
plugins:
- JuliaCI/julia#v1:
version: "1.6"
version: "{{matrix.version}}"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
cuda: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
env:
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
JULIA_CUDA_MEMORY_POOL: "none"
commands: |
echo "--- Configure MPI"
buildkite-agent artifact download --step "cuda-build-openmpi" mpi-prefix.tar.gz .
mkdir -p $${JULIA_MPI_PATH}
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
echo "--- Setup Julia packages"
julia --color=yes --project=. -e '
import Pkg
Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))
'
julia --color=yes --project=test -e '
using Pkg
Pkg.develop(path="lib/MPIPreferences")
using MPIPreferences
MPIPreferences.use_system_binary(export_prefs=true)
rm("test/Manifest.toml")
'
echo "+++ Run tests"
julia --color=yes --project=. -e '
import Pkg
Pkg.test("MPI"; test_args=["--backend=CUDA"])
'
- label: "Tests -- Julia latest"
plugins:
- JuliaCI/julia#v1:
version: "1"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
cuda: "11.0"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
timeout_in_minutes: 90
env:
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
Expand Down Expand Up @@ -141,53 +106,61 @@
key: "rocm-build-openmpi"
agents:
queue: "juliagpu"
rocm: "*" # todo fix ROCM version
rocm: "*"
env:
OPENMPI_VER: "4.1"
OPENMPI_VER_FULL: "4.1.4"
UCX_VER: "1.13-rc1"
OPENMPI_VER: "5.0"
OPENMPI_VER_FULL: "5.0.3"
UCX_VER: "1.17.0"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v1.13.0-rc1/ucx-1.13.0.tar.gz --output ucx.tar.gz
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Build OpenMPI"
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
tar -zxf openmpi.tar.gz
pushd openmpi-*
./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix)
pushd openmpi-$${OPENMPI_VER_FULL}
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix)
make -j
make install
popd
echo "--- Package prefix"
tar -zcf mpi-prefix.tar.gz mpi-prefix/
echo "--- ccache stats"
ccache -s
artifact_paths:
- "mpi-prefix.tar.gz"

- wait

- label: "Tests -- Julia latest"
- label: "Tests -- Julia {{matrix.version}}"
matrix:
setup:
version:
- "1.10"
concurrency: 1
concurrency_group: mpi_rocm
plugins:
- JuliaCI/julia#v1:
version: "1" # failing on 1.8
version: "{{matrix.version}}"
persist_depot_dirs: packages,artifacts,compiled
agents:
queue: "juliagpu"
rocm: "*" # todo fix ROCM version
rocm: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60
soft_fail:
- exit_status: 1
timeout_in_minutes: 90
env:
JULIA_MPI_TEST_NPROCS: 2
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
Expand Down Expand Up @@ -217,6 +190,7 @@
'
echo "+++ Run tests"
export JULIA_MPI_TEST_EXCLUDE="test_allreduce.jl,test_reduce.jl,test_scan.jl"
julia --color=yes --project=. -e '
import Pkg
Pkg.test("MPI"; test_args=["--backend=AMDGPU"])
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/Documenter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v1
with:
version: '1'
- uses: julia-actions/cache@v1
- name: Install dependencies
shell: julia --color=yes --project=docs/ {0}
run: |
Expand Down
20 changes: 19 additions & 1 deletion .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@ on:
types:
- created
workflow_dispatch:
inputs:
lookback:
default: 3
permissions:
actions: read
checks: read
contents: write
deployments: read
issues: read
discussions: read
packages: read
pages: read
pull-requests: read
repository-projects: read
security-events: read
statuses: read
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
Expand All @@ -12,4 +28,6 @@ jobs:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.TAGBOT_KEY }}
# Edit the following line to reflect the actual name of the GitHub Secret containing your private key
ssh: ${{ secrets.DOCUMENTER_KEY }}
# ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}
Loading

0 comments on commit 0b8888b

Please sign in to comment.