diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh new file mode 100644 index 00000000..663721c8 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -0,0 +1,50 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load LUMI/23.09 +module_load partition/G +module_load PrgEnv-cray/8.4.0 +module_load cce/16.0.1 +module_load cray-mpich/8.1.27 +module_load craype-network-ofi +module_load rocm/5.2.3 +module_load buildtools/23.09 +module_load Boost/1.82.0-cpeCray-23.09 +module_load cray-python/3.10.10 +module_load cray-hdf5/1.12.2.7 +module_load craype-x86-trento +module_load craype-accel-amd-gfx90a + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake new file mode 100644 index 00000000..272ab042 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake @@ -0,0 +1,49 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp -hlist=aimd" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_CXX_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/16.0.1/cce/x86_64/lib/libcraymp.so" CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( OpenACC_C_FLAGS "-hacc" CACHE STRING "" ) +set( OpenACC_CXX_FLAGS "-hacc" CACHE STRING "" ) +set( OpenACC_Fortran_FLAGS "-hacc" CACHE STRING "" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") + +set( GPU_TARGETS "gfx90a" CACHE STRING "" ) +# select OpenMP pragma to be used +set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" ) diff --git a/cmake/features/OMP.cmake b/cmake/features/OMP.cmake index 7cd49da8..d938e882 100644 --- a/cmake/features/OMP.cmake +++ b/cmake/features/OMP.cmake @@ -1,5 +1,7 @@ if( HAVE_OMP ) + if( NOT DEFINED HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ) + try_compile( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ${CMAKE_CURRENT_BINARY_DIR} @@ -11,6 +13,10 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ) ecbuild_debug_var( _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT ) + endif() + + if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ) + try_compile( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ${CMAKE_CURRENT_BINARY_DIR} @@ -22,6 +28,10 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ) ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT ) + endif() + + if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) + try_compile( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ${CMAKE_CURRENT_BINARY_DIR} @@ -32,6 +42,8 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT ) + + endif() if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OR HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) set( HAVE_OMP_TARGET_LOOP_CONSTRUCT ON CACHE BOOL "OpenMP target teams loop is supported" ) diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp index c03a877d..dc7efc79 100644 --- a/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp @@ -457,9 +457,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -468,21 +468,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp index 657bed97..d8090966 100644 --- a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp @@ -498,9 +498,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -509,21 +509,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp index 5ba60abf..034270ca 100644 --- a/src/cloudsc_hip/cloudsc/load_state.cpp +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -1,5 +1,14 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + #include "load_state.h" -//#include "yomcst_c.hpp" #include #include