Skip to content

Commit

Permalink
Merge pull request lammps#3828 from vladgl/kk-ompi-rocm
Browse files Browse the repository at this point in the history
Dynamic check for CUDA and ROCm-aware OpenMPI support in KOKKOS package
  • Loading branch information
akohlmey authored Nov 1, 2023
2 parents 45582fe + c5c816e commit e655cda
Showing 1 changed file with 74 additions and 70 deletions.
144 changes: 74 additions & 70 deletions src/KOKKOS/kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,46 +34,10 @@
#endif

#ifdef LMP_KOKKOS_GPU

// for detecting GPU-aware MPI support:
// the variable int have_gpu_aware
// - is 1 if GPU-aware MPI support is available
// - is 0 if GPU-aware MPI support is unavailable
// - is -1 if GPU-aware MPI support is unknown

#define GPU_AWARE_UNKNOWN static int have_gpu_aware = -1;

// TODO HIP: implement HIP-aware MPI support (UCX) detection
#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET)
GPU_AWARE_UNKNOWN
#elif defined(KOKKOS_ENABLE_CUDA)

// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0

#if (OPEN_MPI)
#if (OMPI_MAJOR_VERSION >= 2)

#if (OPEN_MPI) && (OMPI_MAJOR_VERSION >= 2)
#include <mpi-ext.h>

#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
static int have_gpu_aware = 1;
#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT
static int have_gpu_aware = 0;
#else
GPU_AWARE_UNKNOWN
#endif // defined(MPIX_CUDA_AWARE_SUPPORT)

#else // old OpenMPI
GPU_AWARE_UNKNOWN
#endif // (OMPI_MAJOR_VERSION >=2)

#else // unknown MPI library
GPU_AWARE_UNKNOWN
#endif // OPEN_MPI

#endif // KOKKOS_ENABLE_CUDA

#endif // LMP_ENABLE_DEVICE
#endif
#endif

using namespace LAMMPS_NS;

Expand Down Expand Up @@ -243,8 +207,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
// default settings for package kokkos command

binsize = 0.0;
#ifdef KOKKOS_ENABLE_CUDA
// TODO HIP: implement HIP-aware MPI testing
#if defined(LMP_KOKKOS_GPU)
gpu_aware_flag = 1;
#else
gpu_aware_flag = 0;
Expand Down Expand Up @@ -282,70 +245,111 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
#ifdef LMP_KOKKOS_GPU

// check and warn about GPU-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that GPU-aware MPI is not available
// change default only if we can detect that GPU-aware MPI is not available

int nmpi = 0;
MPI_Comm_size(world,&nmpi);
if (nmpi > 1) {

// for detecting GPU-aware MPI support:
// the variable int have_gpu_aware
// - is 1 if GPU-aware MPI support is available
// - is 0 if GPU-aware MPI support is unavailable
// - is -1 if GPU-aware MPI support is unknown

int have_gpu_aware = -1;

// OpenMPI

#if (OPEN_MPI)
#if (OMPI_MAJOR_VERSION >= 2)

#if defined(KOKKOS_ENABLE_CUDA)
#if defined(OMPI_HAVE_MPI_EXT_CUDA) && OMPI_HAVE_MPI_EXT_CUDA
have_gpu_aware = MPIX_Query_cuda_support();
#endif
#endif

#if defined(KOKKOS_ENABLE_HIP)
#if defined(OMPI_HAVE_MPI_EXT_ROCM) && OMPI_HAVE_MPI_EXT_ROCM
have_gpu_aware = MPIX_Query_rocm_support();
#elif (OMPI_MAJOR_VERSION < 5)
have_gpu_aware = 0;
#endif
#endif

#else
have_gpu_aware = 0;
#endif // OMPI_MAJOR_VERSION >= 2

if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
if (me == 0)
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
"use '-pk kokkos gpu/aware on' to override");
gpu_aware_flag = 0;
}

#endif // OPEN_MPI

// IBM Spectrum MPI

#if defined(MPI_VERSION) && (MPI_VERSION > 2)
// Check for IBM Spectrum MPI

int len;
char mpi_version[MPI_MAX_LIBRARY_VERSION_STRING];
MPI_Get_library_version(mpi_version, &len);
if (strstr(&mpi_version[0], "Spectrum") != nullptr) {
gpu_aware_flag = 0;
char* str;
have_gpu_aware = 0;
if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda")))
if ((strcmp(str,"1") == 0)) {
if ((strcmp(str,"1") == 0))
have_gpu_aware = 1;
gpu_aware_flag = 1;
}

if (!gpu_aware_flag)
if (!have_gpu_aware) {
if (me == 0)
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
gpu_aware_flag = 0;
}
}
#endif

if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
if (me == 0)
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
"use '-pk kokkos gpu/aware on' to override");
gpu_aware_flag = 0;
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
// MVAPICH2
if (have_gpu_aware == -1) {
// MVAPICH2
#if defined(MPICH) && defined(MVAPICH2_VERSION)
char* str;
gpu_aware_flag = 0;
have_gpu_aware = 0;
if ((str = getenv("MV2_USE_CUDA")))
if ((strcmp(str,"1") == 0))
gpu_aware_flag = 1;
have_gpu_aware = 1;

if (!gpu_aware_flag)
if (!have_gpu_aware) {
if (me == 0)
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
// pure MPICH or some unsupported MPICH derivative
gpu_aware_flag = 0;
}
// pure MPICH or some MPICH derivative
// check for Cray MPICH which has GPU-aware support
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
char* str;
gpu_aware_flag = 0;
have_gpu_aware = 0;
if ((str = getenv("MPICH_GPU_SUPPORT_ENABLED")))
if ((strcmp(str,"1") == 0))
gpu_aware_flag = 1;
have_gpu_aware = 1;

if (!gpu_aware_flag && me == 0)
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
if (!have_gpu_aware) {
if (me == 0)
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
gpu_aware_flag = 0;
}
#else
if (me == 0)
error->warning(FLERR,"Kokkos with GPU-enabled backend assumes GPU-aware MPI is available,"
" but cannot determine if this is the case\n try"
" '-pk kokkos gpu/aware off' if getting segmentation faults");

if (me == 0)
error->warning(FLERR,"Kokkos with GPU-enabled backend assumes GPU-aware MPI is available,"
" but cannot determine if this is the case\n try"
" '-pk kokkos gpu/aware off' if getting segmentation faults");
#endif
} // if (-1 == have_gpu_aware)
}
} // nmpi > 0
#endif // LMP_ENABLE_DEVICE
#endif // LMP_KOKKOS_GPU

#ifdef KILL_KOKKOS_ON_SIGSEGV
signal(SIGSEGV, my_signal_handler);
Expand Down

0 comments on commit e655cda

Please sign in to comment.