Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pulling in HIP support, and extended OpenMP offload support #248

Merged
merged 5 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions makefiles/c_app.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ APP_ENTRY_MPI_BASENAME := $(basename $(APP_ENTRY_MPI))
APP_ENTRY_OP := $(APP_ENTRY_BASENAME)_op.cpp
APP_ENTRY_MPI_OP := $(APP_ENTRY_MPI_BASENAME)_op.cpp

ALL_VARIANTS := seq genseq vec openmp openmp4 cuda cuda_hyb
ALL_VARIANTS := seq genseq vec openmp openmp4 cuda cuda_hyb hip
ALL_VARIANTS += $(foreach variant,$(ALL_VARIANTS),mpi_$(variant))
ALL_VARIANTS := $(foreach variant,$(ALL_VARIANTS),$(APP_NAME)_$(variant))

Expand All @@ -28,6 +28,10 @@ ifeq ($(HAVE_C),true)
ifeq ($(HAVE_CUDA),true)
BASE_BUILDABLE_VARIANTS += cuda cuda_hyb
endif

ifeq ($(HAVE_HIP),true)
BASE_BUILDABLE_VARIANTS += hip
endif
endif

BUILDABLE_VARIANTS :=
Expand Down Expand Up @@ -69,7 +73,7 @@ all: $(BUILDABLE_VARIANTS)

clean:
-$(RM) $(ALL_VARIANTS)
-$(RM) -r seq vec openmp openmp4 cuda openacc
-$(RM) -r seq vec openmp openmp4 cuda hip openacc
-$(RM) *_op.cpp
-$(RM) .generated .generated
-$(RM) *.d
Expand Down Expand Up @@ -109,6 +113,8 @@ CUDA_HYB_SRC := $(APP_ENTRY_OP) \
MPI_CUDA_HYB_SRC := $(APP_ENTRY_MPI_OP) \
cuda/$(APP_NAME)_mpi_hybkernels_cpu.o cuda/$(APP_NAME)_mpi_hybkernels_gpu.o

HIP_SRC := $(APP_ENTRY_OP) hip/$(APP_NAME)_kernels.o
MPI_HIP_SRC := $(APP_ENTRY_MPI_OP) hip/$(APP_NAME)_mpi_kernels.o

# $(1) = variant name
# $(2) = additional flags
Expand All @@ -134,10 +140,14 @@ $(eval $(call RULE_template, openmp, $(OMP_CPPFLAGS),
$(eval $(call RULE_template, openmp4, $(OMP_OFFLOAD_CPPFLAGS) -DOP2_WITH_OMP4, OPENMP4, ))
$(eval $(call RULE_template, cuda,, CUDA, MPI_CUDA))
$(eval $(call RULE_template, cuda_hyb, $(OMP_CPPFLAGS), CUDA, MPI_CUDA))
$(eval $(call RULE_template, hip,, HIP, MPI_HIP))

$(APP_NAME)_cuda: cuda/$(APP_NAME)_kernels.o
$(APP_NAME)_mpi_cuda: cuda/$(APP_NAME)_mpi_kernels.o

$(APP_NAME)_hip: hip/$(APP_NAME)_kernels.o
$(APP_NAME)_mpi_hip: hip/$(APP_NAME)_mpi_kernels.o

$(APP_NAME)_cuda_hyb: cuda/$(APP_NAME)_hybkernels_gpu.o cuda/$(APP_NAME)_hybkernels_cpu.o
$(APP_NAME)_mpi_cuda_hyb: cuda/$(APP_NAME)_mpi_hybkernels_gpu.o cuda/$(APP_NAME)_mpi_hybkernels_cpu.o

Expand All @@ -147,6 +157,12 @@ cuda/$(APP_NAME)_kernels.o: .generated
cuda/$(APP_NAME)_mpi_kernels.o: .generated
$(NVCC) $(NVCCFLAGS) $(OP2_INC) -c cuda/$(APP_ENTRY_MPI_BASENAME)_kernels.cu -o $@

hip/$(APP_NAME)_kernels.o: .generated
$(HIPCXX) $(HIPFLAGS) $(OP2_INC) -c hip/$(APP_ENTRY_BASENAME)_kernels.cpp -o $@

hip/$(APP_NAME)_mpi_kernels.o: .generated
$(HIPCXX) $(HIPFLAGS) $(OP2_INC) -c hip/$(APP_ENTRY_MPI_BASENAME)_kernels.cpp -o $@

cuda/$(APP_NAME)_hybkernels_gpu.o: .generated
$(NVCC) $(NVCCFLAGS) -DOP_HYBRID_GPU -DGPUPASS $(OP2_INC) \
-c cuda/$(APP_ENTRY_BASENAME)_hybkernels.cu -o $@
Expand Down
16 changes: 14 additions & 2 deletions makefiles/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ ifeq ($(MAKECMDGOALS),config)
$(call info_bold,> C/C++ CUDA compiler $(TEXT_NOTFOUND); skipping search for CUDA libraries)
endif

ifeq ($(CONFIG_HAVE_C_HIP),true)
$(call info_bold,> C/C++ HIP compiler $(TEXT_FOUND) ($(CONFIG_HIP)); looking for the HIP libraries)
include $(DEPS_DIR)/hip.mk
else
$(call info_bold,> C/C++ HIP compiler $(TEXT_NOTFOUND); skipping search for HIP libraries)
endif

$(info )

ifeq ($(CONFIG_HAVE_MPI_C),true)
Expand Down Expand Up @@ -139,6 +146,7 @@ ifneq ($(MAKECMDGOALS),clean)
$(info . C: $(if $(HAVE_C),$(CC),not found))
$(info . C++: $(if $(HAVE_C),$(CXX),not found))
$(info . CUDA: $(if $(HAVE_C_CUDA),$(NVCC),not found))
$(info . HIP: $(if $(HAVE_C_HIP),$(HIPCXX),not found))
$(info . Fortran: $(if $(HAVE_F),$(FC),not found))
$(info )
$(info MPI compilers:)
Expand All @@ -161,14 +169,15 @@ ifneq ($(MAKECMDGOALS),clean)
$(info . C: $(CFLAGS))
$(info . C++: $(CXXFLAGS))
$(info . CUDA: $(NVCCFLAGS))
$(info . HIP: $(HIPFLAGS))
$(info . Fortran: $(FFLAGS))
$(info )
endif

OP2_LIBS_SINGLE_NODE := seq cuda openmp openmp4
OP2_LIBS_SINGLE_NODE := seq cuda hip openmp openmp4
OP2_FOR_LIBS_SINGLE_NODE := $(foreach lib,$(OP2_LIBS_SINGLE_NODE),f_$(lib))

OP2_LIBS_MPI := mpi mpi_cuda
OP2_LIBS_MPI := mpi mpi_cuda mpi_hip
OP2_FOR_LIBS_MPI := $(foreach lib,$(OP2_LIBS_MPI),f_$(lib))

OP2_LIBS := hdf5 $(OP2_LIBS_SINGLE_NODE) $(OP2_LIBS_MPI)
Expand Down Expand Up @@ -199,3 +208,6 @@ $(foreach lib,$(OP2_LIBS_MPI),$(eval $(call OP2_LIB_template,$(lib),\

OP2_LIB_CUDA += $(CUDA_LIB)
OP2_LIB_MPI_CUDA += $(CUDA_LIB)

OP2_LIB_HIP += $(HIP_LIB)
OP2_LIB_MPI_HIP += $(HIP_LIB)
12 changes: 12 additions & 0 deletions makefiles/compilers.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ ifdef OP2_COMPILER
OP2_C_COMPILER ?= $(OP2_COMPILER)
OP2_F_COMPILER ?= $(OP2_COMPILER)
OP2_C_CUDA_COMPILER ?= nvhpc
OP2_C_HIP_COMPILER ?= hip
endif

# Process CUDA_GEN and NV_ARCH until CUDA_GEN is a whitespace separated list of
Expand All @@ -28,6 +29,10 @@ ifdef OP2_C_CUDA_COMPILER
include $(MAKEFILES_DIR)/compilers/c_cuda/$(OP2_C_CUDA_COMPILER).mk
endif

ifdef OP2_C_HIP_COMPILER
include $(MAKEFILES_DIR)/compilers/c_hip/$(OP2_C_HIP_COMPILER).mk
endif

ifdef OP2_F_COMPILER
include $(MAKEFILES_DIR)/compilers/fortran/$(OP2_F_COMPILER).mk
endif
Expand All @@ -48,6 +53,13 @@ ifneq ($(shell which $(CONFIG_NVCC) 2> /dev/null),)
CONFIG_HAVE_C_CUDA := true
endif

ifneq ($(shell which $(CONFIG_HIP) 2> /dev/null),)
CONFIG_HIP != which $(CONFIG_HIP)
CONFIG_HIPCXX = $(CONFIG_HIP)
CONFIG_HIPCXX = $(CONFIG_HIP)
CONFIG_HAVE_C_HIP := true
endif

ifneq ($(shell which $(CONFIG_FC) 2> /dev/null),)
CONFIG_FC != which $(CONFIG_FC)
CONFIG_HAVE_F := true
Expand Down
4 changes: 4 additions & 0 deletions makefiles/compilers/c/clang.mk
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ CONFIG_CC := clang
CONFIG_CXX := clang++

CONFIG_CXXLINK ?= -lc++

CONFIG_CPP_HAS_OMP_OFFLOAD ?= true
OMP_OFFLOAD_CXXFLAGS = -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a
CONFIG_OMP_OFFLOAD_CXXFLAGS = -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a
4 changes: 3 additions & 1 deletion makefiles/compilers/c/cray.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ CONFIG_OMP_CPPFLAGS ?= -fopenmp
CONFIG_CPP_HAS_OMP ?= true

# CONFIG_OMP_OFFLOAD_CPPFLAGS ?=
CONFIG_CPP_HAS_OMP_OFFLOAD ?= false
CONFIG_CPP_HAS_OMP_OFFLOAD ?= true
OMP_OFFLOAD_CXXFLAGS = -fopenmp
CONFIG_OMP_OFFLOAD_CXXFLAGS = -fopenmp
2 changes: 1 addition & 1 deletion makefiles/compilers/c/nvhpc.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ CONFIG_CPP_HAS_OMP ?= true
GPU_FFLAG := -gpu=fastmath,ptxinfo,lineinfo
$(foreach arch,$(CUDA_GEN),$(eval GPU_FFLAG := $(GPU_FFLAG),cc$(arch)))

CONFIG_OMP_OFFLOAD_CPPFLAGS ?= -mp=gpu $(GPU_FFLAG)
CONFIG_OMP_OFFLOAD_CXXFLAGS ?= -mp=gpu $(GPU_FFLAG)
CONFIG_CPP_HAS_OMP_OFFLOAD ?= true
9 changes: 9 additions & 0 deletions makefiles/compilers/c_hip/hip.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CONFIG_HIP ?= hipcc

ifndef DEBUG
HIP_OPT := -Ofast
else
HIP_OPT := -g -O0
endif

CONFIG_HIPFLAGS ?= -x hip --offload-arch=$(HIP_ARCH) $(HIP_OPT)
33 changes: 33 additions & 0 deletions makefiles/dependencies/hip.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ifdef HIP_INSTALL_PATH
HIP_INC_PATH := -I$(HIP_INSTALL_PATH)/include
HIP_LIB_PATH := -L$(HIP_INSTALL_PATH)/lib64 -L$(HIP_INSTALL_PATH)/lib
endif

HIP_TEST = $(CONFIG_HIP) $(HIP_INC_PATH) \
$(DEPS_DIR)/tests/hip.cpp $(HIP_LIB_PATH) $(HIP_LINK) \
-o $(DEPS_DIR)/tests/hip

$(file > $(DEP_BUILD_LOG),$(HIP_TEST))
$(shell $(HIP_TEST) >> $(DEP_BUILD_LOG) 2>&1)

ifneq ($(.SHELLSTATUS),0)
HIP_LINK ?= #-lculibos -lpthread -lrt -ldl

$(file >> $(DEP_BUILD_LOG),$(HIP_TEST))
$(shell $(HIP_TEST) >> $(DEP_BUILD_LOG) 2>&1)
endif

ifeq ($(.SHELLSTATUS),0)
$(shell rm -f $(DEPS_DIR)/tests/hip)

$(call info_bold, > HIP libraries $(TEXT_FOUND) (link flags: $(or $(HIP_LINK), none)))

CONFIG_HAVE_HIP := true

CONFIG_HIP_INC := $(strip $(HIP_INC_PATH) $(HIP_DEF))
CONFIG_HIP_LIB := $(strip $(HIP_LIB_PATH) $(HIP_LINK))
else
$(call info_bold, > HIP libraries $(TEXT_NOTFOUND):)
$(info $(file < $(DEP_BUILD_LOG)))
$(info )
endif
7 changes: 7 additions & 0 deletions makefiles/dependencies/tests/hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <hip/hip_runtime.h>

int main() {
int deviceCount = 0;
auto err = hipGetDeviceCount(&deviceCount);
return err;
}
39 changes: 39 additions & 0 deletions op2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ ifeq ($(HAVE_C),true)
endif
endif

ifeq ($(HAVE_HIP),true)
OP2_BUILDABLE_LIBS += hip

ifeq ($(HIP_IS_BUILDABLE),true)
OP2_BUILDABLE_LIBS += mpi_hip
endif
endif

ifeq ($(MPI_IS_BUILDABLE),true)
OP2_BUILDABLE_LIBS += mpi
endif
Expand Down Expand Up @@ -110,6 +118,10 @@ OP2_CUDA := $(OP2_BASE) $(addprefix $(OBJ)/,\
cuda/op_cuda_decl.o \
cuda/op_cuda_rt_support.o)

OP2_HIP := $(OP2_BASE) $(addprefix $(OBJ)/,\
hip/op_hip_decl.o \
hip/op_hip_rt_support.o)

OP2_FOR_CUDA := $(OP2_CUDA) $(OP2_FOR_BASE_CUDA) $(addprefix $(OBJ)/fortran/,\
op2_for_rt_wrappers_cuda.o \
cudaConfigurationParams.o)
Expand Down Expand Up @@ -153,6 +165,18 @@ OP2_MPI_CUDA := $(OP2_BASE) $(addprefix $(OBJ)/,\
externlib/op_util.o \
externlib/op_renumber.o)

OP2_MPI_HIP := $(OP2_BASE) $(addprefix $(OBJ)/,\
hip/op_hip_rt_support+mpi.o \
mpi/op_mpi_core.o \
mpi/op_mpi_part_core.o \
mpi/op_mpi_hip_decl.o \
mpi/op_mpi_hip_rt_support.o \
mpi/op_mpi_hip_kernels.o \
mpi/op_mpi_hdf5.o \
mpi/op_mpi_util.o \
externlib/op_util.o \
externlib/op_renumber.o)

OP2_FOR_MPI_CUDA := $(OP2_MPI_CUDA) $(OP2_FOR_BASE_MPI_CUDA) $(addprefix $(OBJ)/fortran/,\
op2_for_rt_wrappers_cuda.o \
cudaConfigurationParams.o)
Expand Down Expand Up @@ -186,6 +210,12 @@ $(OBJ)/cuda/%+mpi.o: src/cuda/%.cpp | $(OBJ)
$(OBJ)/cuda/%.o: src/cuda/%.cpp | $(OBJ)
$(CXX) $(CXXFLAGS) $(INC) -DSET_CUDA_CACHE_CONFIG -c $< -o $@

$(OBJ)/hip/%+mpi.o: src/hip/%.cpp | $(OBJ)
$(MPICXX) $(CXXFLAGS) $(HIPFLAGS) $(INC) -DOPMPI -c $< -o $@

$(OBJ)/hip/%.o: src/hip/%.cpp | $(OBJ)
$(CXX) $(CXXFLAGS) $(HIPFLAGS) $(INC) -c $< -o $@

$(OBJ)/openmp4/%.o: src/openmp4/%.cpp | $(OBJ)
$(CXX) $(CXXFLAGS) $(OMP_OFFLOAD_CXXFLAGS) $(INC) -c $< -o $@

Expand All @@ -195,6 +225,15 @@ $(OBJ)/mpi/%.o: src/mpi/%.cpp | $(OBJ)
$(OBJ)/mpi/%.o: src/mpi/%.cu | $(OBJ)
$(NVCC) $(NVCCFLAGS) $(INC) -c $< -o $@

$(OBJ)/mpi/op_mpi_hip_decl.o: src/mpi/op_mpi_hip_decl.cpp | $(OBJ)
$(MPICXX) $(CXXFLAGS) $(HIPFLAGS) $(INC) -c $< -o $@

$(OBJ)/mpi/op_mpi_hip_rt_support.o: src/mpi/op_mpi_hip_rt_support.cpp | $(OBJ)
$(MPICXX) $(CXXFLAGS) $(HIPFLAGS) $(INC) -c $< -o $@

$(OBJ)/mpi/op_mpi_hip_kernels.o: src/mpi/op_mpi_hip_kernels.cpp | $(OBJ)
$(MPICXX) $(CXXFLAGS) $(HIPFLAGS) $(INC) -c $< -o $@

$(OBJ)/fortran/%+mpi.o: src/fortran/%.c | $(OBJ)
$(MPICC) $(CFLAGS) $(INC) -DOPMPI -c $< -o $@

Expand Down
4 changes: 2 additions & 2 deletions op2/include/op_cuda_rt_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ void cutilDeviceInit_mpi(int argc, char **argv, int mpi_rank);
* routines to move arrays to/from GPU device
*/

void op_mvHostToDevice(void **map, int size);
void op_mvHostToDevice(void **map, size_t size);

void op_cpHostToDevice(void **data_d, void **data_h, int size);
void op_cpHostToDevice(void **data_d, void **data_h, size_t size);

void op_cuda_get_data(op_dat dat);

Expand Down
Loading
Loading