diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cfc45db7e..c088ad2ba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,6 +2,7 @@ stages: - API_Build - APP_Build + - HIP - test - docs - clean @@ -22,6 +23,118 @@ build:GNU:API: paths: - $CI_PROJECT_DIR/OPS-INSTALL +Test:HIP: + stage: HIP + tags: + - CCP, test + script: + - |- + export PATH=/opt/rocm-4.5.0/bin:$PATH + export OPS_COMPILER=hip + export MPI_INSTALL_PATH=/usr + export HDF5_INSTALL_PATH=/usr/lib/x86_64-linux-gnu/hdf5/openmpi + export HIP_INSTALL_PATH=/opt/rocm-4.5.0/ + export OPS_INSTALL_PATH=$CI_PROJECT_DIR/ops + export MPICC=mpicc + export MPICXX=mpic++ + export MPICPP=mpicxx + export MPIFC=mpif90 + export MPIF90=mpif90 + export MPI_INC=/usr/lib/x86_64-linux-gnu/openmpi/include + export XCOMPILER=-Xcompiler + export HIP_LINK="-L/usr/local/cuda/lib64 -lcudart" + export HIP_HDF5_MPI_LINK="-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi/lib -lmpi_cxx -lmpi" + export HIPMPICPP=mpicxx + export HIPIEEE="--fmad false" + export HIP_PLATFORM=nvidia + - cd ops/c + - make seq IEEE=1 + - make hip IEEE=1 + - make hdf5_seq IEEE=1 + - cd ../../apps/c + - cd CloverLeaf + - make cloverleaf_hip IEEE=1 + - |- + ./cloverleaf_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" clover.out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../CloverLeaf_3D + - make cloverleaf_hip IEEE=1 + - |- + ./cloverleaf_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" clover.out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../CloverLeaf_3D_HDF5 + - make cloverleaf_hip IEEE=1 + - make generate_file IEEE=1 + - |- + ./generate_file + ./cloverleaf_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" clover.out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../lowdim_test + - make lowdim_hip + - |- + ./lowdim_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../mblock + - make mblock_hip + - |- + ./mblock_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > mblock.out + grep "PASSED" mblock.out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../mb_shsgc/Max_datatransfer + # - make shsgc_hip + # - |- + # ./shsgc_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + # grep "Pre shock error is:" perf_out + # grep "Post shock error is:" perf_out + # grep "Post shock Error is" perf_out + # grep "Total Wall time" perf_out + # grep -e "acceptable" -e "correct" perf_out + # rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../../multiDim + - make multidim_hip + - |- + ./multidim_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../multiDim3D + - make multidim_hip + - |- + ./multidim_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../poisson + - make poisson_hip + - |- + ./poisson_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../shsgc + - make shsgc_hip + - |- + ./shsgc_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../TeaLeaf + - make tealeaf_hip + - |- + ./tealeaf_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 > perf_out + grep "PASSED" perf_out + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; fi + - cd ../multiDim_HDF5 + - make -f Makefile.write write_hip + - rm .generated + - make read_hip + - |- + rm -rf write_data.h5 read_data.h5 + ./write_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 + ./read_hip OPS_BLOCK_SIZE_X=64 OPS_BLOCK_SIZE_Y=4 + h5diff write_data.h5 read_data.h5 + rc=$?; if [[ $rc != 0 ]]; then echo "TEST FAILED";exit $rc; else echo "TEST PASSED"; fi + build:Intel:API: stage: API_Build tags: diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b7a2d89d..e8e149f1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ project(OPS C CXX) # if show the compiling process in detail option(OPS_VERBOSE_WARNING "Turn on verbose warning messages" OFF) option(OPS_TEST "Turn on tests for Apps" OFF) +option(OPS_HIP "Turn on the HIP backend" OFF) if (NOT OPS_VERBOSE_WARNING) message("We show concise compiling information by defautl! Use -DOPS_VERBOSE_WARNING=ON to switch on.") endif() @@ -26,7 +27,7 @@ set(HDF5_PREFER_PARALLEL true) # Configure Compilers # C set(CMAKE_C_STANDARD 99) -#TODO:Shall we keep the "-g" in the release mode? It increases file size. + if (${CMAKE_C_COMPILER_ID} STREQUAL GNU) set(CMAKE_C_FLAGS "-fPIC -Wall") set(CMAKE_C_FLAGS_RELEASE "-O3") @@ -91,6 +92,7 @@ set(ConfigPackageLocation ${CMAKE_INSTALL_PREFIX}/lib/cmake) # Add find scripts list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + # Try to find the required dependency find_package(MPI QUIET) find_package(HDF5 QUIET COMPONENTS C HL) @@ -98,9 +100,27 @@ find_package(CUDAToolkit QUIET) find_package(OpenACC QUIET) find_package(OpenCL QUIET) find_package(OpenMP QUIET) -find_package(Python2 QUIET) -if (NOT Python2_FOUND) - message (FATAL_ERROR "We cannot find Python2 and the Python translator needs Python2! Please use -DPython2_EXECUTABLE to specify the path.") +find_package(Python3 QUIET) + +if (OPS_HIP) + # Search for rocm in common locations + list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) + # Find hip + find_package(hip) + #enable_language(HIP) + #message($ENV{HIP_PLATFORM}) + set(ENV{HIP_PLATFORM} "nvidia") + #message($ENV{HIP_PLATFORM}) + set(HIP_FOUND TRUE) + + if({$hip::host} STREQUAL "" OR {$hip::device} STREQUAL "" ) + message (WARNING "We cannot find the HIP environment. The HIP codes won't work! You might need to use CMAKE_PREFIX_PATH to specify the path for HIP!") + set(HIP_FOUND FALSE) + endif () +endif () + +if (NOT Python3_FOUND) + message (FATAL_ERROR "We cannot find Python3 and the Python translator needs Python3! Please use -DPython3_EXECUTABLE to specify the path.") endif () # Configure the "include" dir for compiling if (NOT HDF5_FOUND) @@ -113,7 +133,7 @@ endif () if (CUDAToolkit_FOUND) set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE}) if (GPU_ARCH) - set(CMAKE_CUDA_ARCHITECTURES 70 CACHE STRING "CUDA architectures") + set(CMAKE_CUDA_ARCHITECTURES ${GPU_ARCH} CACHE STRING "CUDA architectures") else() message(WARNING "Please the GPU architecture using -DGPU_ARCH=XXX!") endif() diff --git a/apps/c/CMakeLists.txt b/apps/c/CMakeLists.txt index 18eb8fe5c..1d8384af6 100644 --- a/apps/c/CMakeLists.txt +++ b/apps/c/CMakeLists.txt @@ -141,9 +141,9 @@ if (${CMAKE_PROJECT_NAME} STREQUAL APP) find_package(OpenACC QUIET) find_package(OpenCL QUIET) find_package(OpenMP QUIET) - find_package(Python2 QUIET) - if (NOT Python2_FOUND) - message (FATAL_ERROR "We cannot find Python2 and the Python translator needs Python2! Please use -DPython2_EXECUTABLE to specify the path.") + find_package(Python3 REQUIRED) + if (NOT Python3_FOUND) + message (FATAL_ERROR "We cannot find Python3 and the Python translator needs Python3! Please use -DPython3_EXECUTABLE to specify the path.") else() FIND_PATH (opsc NAMES "ops.py" PATHS ${SEARCH_PATHS} PATH_SUFFIXES bin/ops_translator/c) FIND_PATH (opsfortran NAMES "ops_fortran.py" PATHS ${SEARCH_PATHS} PATH_SUFFIXES bin/ops_translator/fortran) @@ -227,14 +227,17 @@ if (${CMAKE_PROJECT_NAME} STREQUAL OPS) set(HDF5_SEQ ${HDF5_FOUND}) set(CUDA ${CUDAToolkit_FOUND}) set(OPENCL ${OpenCL_FOUND}) + if (OPS_HIP) + set(HIP ${HIP_FOUND}) + endif () if (${MPI_FOUND}) set(MPI TRUE) set(HDF5_MPI ${HDF5_FOUND}) set(CUDA_MPI ${CUDAToolkit_FOUND}) set(OPENCL_MPI ${OpenCL_FOUND}) endif() - if (NOT Python2_FOUND) - message (FATAL_ERROR "We cannot find Python2 and the Python translator needs Python2!") + if (NOT Python3_FOUND) + message (FATAL_ERROR "We cannot find Python3 and the Python translator needs Python3!") else() set(OPS_C_TRANSLATOR "${CMAKE_SOURCE_DIR}/ops_translator/c/ops.py") set(OPS_F_TRANSLATOR "${opsfortran}/ops_translator/fortran/ops_fortran.py") @@ -386,6 +389,27 @@ macro(BUILD_OPS_C_SAMPLE Name Odd Others Extra Trid GenerateTest) ) endif() endif() + if (OPS_HIP) + if (HIP AND NOT TRID) + add_executable(${Name}_hip ${OPS} ${OTHERS} "${TMP_SOURCE_DIR}/HIP/${KernerName}_kernels.cpp") + message("${TMP_SOURCE_DIR}/HIP/${KernerName}_hip_kernel.cpp") + target_include_directories(${Name}_hip PRIVATE ${TMP_SOURCE_DIR}) + target_link_libraries(${Name}_hip ops_hip hip::device) + #if (HDF5_SEQ) + # target_link_libraries(${Name}_hip ops_hdf5_seq hdf5::hdf5 hdf5::hdf5_hl MPI::MPI_CXX) + #endif() + install(TARGETS ${Name}_hip DESTINATION ${APP_INSTALL_DIR}/${Name}) + if ((OPS_TEST) AND (GPU_NUMBER GREATER_EQUAL 1) AND (${GenerateTest} STREQUAL "YES")) + set(args "OPS_CL_DEVICE=1 OPS_BLOCK_SIZE_X=512 OPS_BLOCK_SIZE_Y=1") + add_test(NAME ${Name}_hip + COMMAND ${CMAKE_COMMAND} -DCMD=$ -DARG=${args} -DOPS_INSTALL_PATH=${OPS_INSTALL_PATH} + -P ${OPS_APP_SRC}/runtests.cmake + WORKING_DIRECTORY "${TMP_SOURCE_DIR}" + ) + endif() + endif() + endif() + if (MPI) add_executable(${Name}_mpi_dev ${DEV} ${OTHERS}) target_include_directories(${Name}_mpi_dev PRIVATE ${TMP_SOURCE_DIR}) diff --git a/ops/c/CMakeLists.txt b/ops/c/CMakeLists.txt index e99f9f0ff..b2604477e 100644 --- a/ops/c/CMakeLists.txt +++ b/ops/c/CMakeLists.txt @@ -49,6 +49,19 @@ if (OpenCL_FOUND) target_include_directories(ops_opencl PRIVATE ${OpenCL_INCLUDE_DIRS}) InstallTarget(opencl ${ConfigPackageLocation}) endif () +if (OPS_HIP) +if (HIP_FOUND) + file(GLOB_RECURSE HIP "${CMAKE_CURRENT_SOURCE_DIR}/src/hip/*.cpp") + add_library(ops_hip ${CORE} ${EXTERN} ${HIP}) + #-D__HIP_PLATFORM_NVCC__= -D__HIP_PLATFORM_NVIDIA__ -I/opt/rocm-4.5.0/hip/include -I/usr/local/cuda/include + target_compile_definitions(ops_hip PRIVATE __HIP_PLATFORM_NVIDIA__) + #target_compile_options(ops_hip PRIVATE -I/opt/rocm-4.5.0/hip/include) + target_link_libraries(ops_hip PRIVATE CUDA::cudart_static) + + target_include_directories(ops_hip PRIVATE /opt/rocm-4.5.0/hip/include) + InstallTarget(hip ${ConfigPackageLocation}) +endif () +endif () if (MPI_FOUND) file(GLOB_RECURSE MPICORE "${CMAKE_CURRENT_SOURCE_DIR}/src/core/*.cpp") @@ -92,6 +105,20 @@ if (MPI_FOUND) target_link_libraries(ops_mpi_opencl PRIVATE ${OpenCL_LIBRARIES} MPI::MPI_CXX ) InstallTarget(mpi_opencl ${ConfigPackageLocation}) endif() + if (OPS_HIP) + if (HIP_FOUND) + file(GLOB_RECURSE MPIOPENCL "${CMAKE_CURRENT_SOURCE_DIR}/src/mpi/*") + list(FILTER MPIOPENCL EXCLUDE REGEX "cuda") + list(FILTER MPIOPENCL EXCLUDE REGEX "host") + list(FILTER MPIOPENCL EXCLUDE REGEX "hdf5") + list(FILTER MPIOPENCL EXCLUDE REGEX "decl.cpp") + list(FILTER MPIOPENCL EXCLUDE REGEX "hip") + add_library(ops_mpi_opencl ${MPICORE} ${EXTERN} ${MPIOPENCL} "${CMAKE_CURRENT_SOURCE_DIR}/src/opencl/ops_opencl_rt_support.cpp") + target_include_directories(ops_mpi_opencl PRIVATE ${OpenCL_INCLUDE_DIRS}) + target_link_libraries(ops_mpi_opencl PRIVATE ${OpenCL_LIBRARIES} MPI::MPI_CXX ) + InstallTarget(mpi_opencl ${ConfigPackageLocation}) +endif() +endif() endif () # Tridiagonal library