Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesigned "caar loop pre-boundary exchange", tuned for Frontier, also faster on Perlmutter GPU #6972

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions components/homme/cmake/machineFiles/frontier-bfb-serial.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
SET (HOMME_MACHINE "frontier-bfb-serial" CACHE STRING "")

#SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

SET (NETCDF_DIR $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{CRAY_HDF5_PARALLEL_PREFIX} CACHE FILEPATH "")
SET (CPRNC_DIR /ccs/proj/cli115/software/frontier/cprnc CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")

SET(USE_TRILINOS OFF CACHE BOOL "")

#SET(HIP_BUILD TRUE CACHE BOOL "")

SET(Kokkos_ENABLE_SERIAL ON CACHE BOOL "")
#SET(Kokkos_ENABLE_DEBUG OFF CACHE BOOL "")
#SET(Kokkos_ARCH_VEGA90A ON CACHE BOOL "")
SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_HIP ON CACHE BOOL "")
SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")

SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
#SET(CMAKE_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "")

SET(Extrae_LIBRARY "-I$ENV{CRAY_MPICH_DIR}/include -L$ENV{CRAY_MPICH_DIR}/lib -lmpi -lmpifort" CACHE STRING "")

#SET(ADD_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround -h flex_mp=intolerant -h thread0 -G2 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_Fortran_FLAGS "-g -O0 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_C_FLAGS "-g -O0 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_CXX_FLAGS "-g -std=c++17 -O0 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-g -O0 ${Extrae_LIBRARY}" CACHE STRING "")


set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "short" CACHE STRING "")
SET (BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

set (USE_NUM_PROCS 1 CACHE STRING "")

#SET (USE_MPI_OPTIONS "-c7 --gpu-bind=closest --gpus-per-task=1" CACHE FILEPATH "")
9 changes: 5 additions & 4 deletions components/homme/cmake/machineFiles/frontier-bfb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@ SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")

SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround" CACHE STRING "")
#SET(CMAKE_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "")

SET(Extrae_LIBRARY "-I$ENV{CRAY_MPICH_DIR}/include -L$ENV{CRAY_MPICH_DIR}/lib -lmpi $ENV{PE_MPICH_GTL_DIR_amd_gfx90a} $ENV{PE_MPICH_GTL_LIBS_amd_gfx90a}" CACHE STRING "")
SET(Extrae_LIBRARY "-I$ENV{CRAY_MPICH_DIR}/include -L$ENV{CRAY_MPICH_DIR}/lib -lmpi $ENV{PE_MPICH_GTL_DIR_amd_gfx90a} $ENV{PE_MPICH_GTL_LIBS_amd_gfx90a} -lmpifort" CACHE STRING "")

SET(ADD_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround -h flex_mp=intolerant -h thread0 -G2 ${Extrae_LIBRARY}" CACHE STRING "")
#SET(ADD_Fortran_FLAGS "--gcc-toolchain=$ENV{MEMBERWORK}/cli115/workaround -h flex_mp=intolerant -h thread0 -G2 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_Fortran_FLAGS "-h flex_mp=intolerant -h thread0 -G2 ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_C_FLAGS "-g -O -ffp-model=strict ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_CXX_FLAGS "-g -std=c++14 -O -ffp-model=strict -munsafe-fp-atomics --offload-arch=gfx90a -fno-gpu-rdc -Wno-unused-command-line-argument -Wno-unsupported-floating-point-opt -Wno-#pragma-messages ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_CXX_FLAGS "-g -std=c++17 -O -ffp-model=strict -munsafe-fp-atomics --offload-arch=gfx90a -fno-gpu-rdc -Wno-unused-command-line-argument -Wno-unsupported-floating-point-opt -Wno-#pragma-messages ${Extrae_LIBRARY}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-g -O ${Extrae_LIBRARY}" CACHE STRING "")


Expand Down
48 changes: 48 additions & 0 deletions components/homme/cmake/machineFiles/pm-cpu-bfb.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# CMake initial cache file
#
# This machine file works with PrgEnv-intel
#
#
# Perlmutter generic MPI enabled compiler wrappers:
SET (CMAKE_Fortran_COMPILER ftn CACHE FILEPATH "")
SET (CMAKE_C_COMPILER cc CACHE FILEPATH "")
SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "")


# Set kokkos arch, to get correct avx flags
SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "")

SET (WITH_PNETCDF FALSE CACHE FILEPATH "")

EXECUTE_PROCESS(COMMAND nf-config --prefix
RESULT_VARIABLE NFCONFIG_RESULT
OUTPUT_VARIABLE NFCONFIG_OUTPUT
ERROR_VARIABLE NFCONFIG_ERROR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
SET (NetCDF_Fortran_PATH "${NFCONFIG_OUTPUT}" CACHE STRING "")

EXECUTE_PROCESS(COMMAND nc-config --prefix
RESULT_VARIABLE NCCONFIG_RESULT
OUTPUT_VARIABLE NCCONFIG_OUTPUT
ERROR_VARIABLE NCCONFIG_ERROR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
SET (NetCDF_C_PATH "${NCCONFIG_OUTPUT}" CACHE STRING "")

SET (USE_QUEUING FALSE CACHE BOOL "")
# for standalone HOMME builds:
SET(CPRNC_DIR /global/cfs/cdirs/e3sm/tools/cprnc CACHE FILEPATH "")

SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")
#turn on preqxx target and thus strict fpmodel for F vs CXX comparison
SET (ADD_Fortran_FLAGS "-traceback -fp-model strict -qopenmp -O1" CACHE STRING "")
SET (ADD_C_FLAGS "-traceback -fp-model strict -qopenmp -O1" CACHE STRING "")
SET (ADD_CXX_FLAGS "-traceback -fp-model strict -qopenmp -O1" CACHE STRING "")
SET (BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")
SET (HOMME_TESTING_PROFILE "short" CACHE STRING "")
SET (BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

SET(USE_MPIEXEC "srun" CACHE STRING "")
SET(USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "")
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ class HyperviscosityFunctorImpl
SphereOperators m_sphere_ops;

// Policies
#ifndef NDEBUG
#if defined(KOKKOS_ENABLE_CUDA) && !defined(NDEBUG)
template<typename Tag>
using TeamPolicyType = Kokkos::TeamPolicy<ExecSpace,Kokkos::LaunchBounds<512,1>,Tag>;
#else
Expand Down
Loading
Loading