Skip to content

Commit

Permalink
Merge pull request #63 from ecmwf-ifs/nams_paper
Browse files Browse the repository at this point in the history
HIP updates
  • Loading branch information
reuterbal authored Jan 18, 2024
2 parents 3d1a5f4 + 3c5776f commit 54f7eea
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 15 deletions.
50 changes: 50 additions & 0 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload to be certain
module reset

# Load modules
module_load LUMI/23.09
module_load partition/G
module_load PrgEnv-cray/8.4.0
module_load cce/16.0.1
module_load cray-mpich/8.1.27
module_load craype-network-ofi
module_load rocm/5.2.3
module_load buildtools/23.09
module_load Boost/1.82.0-cpeCray-23.09
module_load cray-python/3.10.10
module_load cray-hdf5/1.12.2.7
module_load craype-x86-trento
module_load craype-accel-amd-gfx90a

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
49 changes: 49 additions & 0 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

####################################################################
# COMPILER
####################################################################

set( ECBUILD_FIND_MPI OFF )
set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )

####################################################################
# OpenMP FLAGS
####################################################################

set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_Fortran_FLAGS "-homp -hlist=aimd" CACHE STRING "" )
set( OpenMP_C_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_CXX_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_Fortran_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/16.0.1/cce/x86_64/lib/libcraymp.so" CACHE STRING "" )

####################################################################
# OpenACC FLAGS
####################################################################

set( OpenACC_C_FLAGS "-hacc" CACHE STRING "" )
set( OpenACC_CXX_FLAGS "-hacc" CACHE STRING "" )
set( OpenACC_Fortran_FLAGS "-hacc" CACHE STRING "" )

####################################################################
# Compiler FLAGS
####################################################################

# General Flags (add to default)
set(ECBUILD_Fortran_FLAGS "-hcontiguous")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

set( GPU_TARGETS "gfx90a" CACHE STRING "" )
# select OpenMP pragma to be used
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" )
12 changes: 12 additions & 0 deletions cmake/features/OMP.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
if( HAVE_OMP )

if( NOT DEFINED HAVE_OMP_TARGET_TEAMS_DISTRIBUTE )

try_compile(
HAVE_OMP_TARGET_TEAMS_DISTRIBUTE
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -11,6 +13,10 @@ if( HAVE_OMP )
ecbuild_debug_var( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE )
ecbuild_debug_var( _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT )

endif()

if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )

try_compile(
HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -22,6 +28,10 @@ if( HAVE_OMP )
ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )
ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT )

endif()

if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )

try_compile(
HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -32,6 +42,8 @@ if( HAVE_OMP )

ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT )

endif()

if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OR HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT ON CACHE BOOL "OpenMP target teams loop is supported" )
Expand Down
18 changes: 11 additions & 7 deletions src/cloudsc_hip/cloudsc/cloudsc_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
double t2 = omp_get_wtime();

printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks);
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s");
double zfrac, zmflops;
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s");
double zfrac, zmflops, zthrput;
for (int t = 0; t < numthreads; t++) {
const double tloc = zinfo[0][t];
const int coreid = (int) zinfo[1][t];
Expand All @@ -468,21 +468,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
zfrac = (double)igpc / (double)numcols;
if (tloc > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc;
zthrput = (double)numcols/tloc;
} else {
zmflops = 0.;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput);
}
double tdiff = t2 - t1;
zfrac = 1.0;
if (tdiff > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff;
zthrput = (double)numcols/tdiff;
} else {
zmflops = 0.0;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput);

cloudsc_validate(klon, nlev, nclv, numcols, nproma,
plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif,
Expand Down
18 changes: 11 additions & 7 deletions src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
double t2 = omp_get_wtime();

printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks);
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s");
double zfrac, zmflops;
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s");
double zfrac, zmflops, zthrput;
for (int t = 0; t < numthreads; t++) {
const double tloc = zinfo[0][t];
const int coreid = (int) zinfo[1][t];
Expand All @@ -509,21 +509,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
zfrac = (double)igpc / (double)numcols;
if (tloc > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc;
zthrput = (double)numcols/tloc;
} else {
zmflops = 0.;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput);
}
double tdiff = t2 - t1;
zfrac = 1.0;
if (tdiff > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff;
zthrput = (double)numcols/tdiff;
} else {
zmflops = 0.0;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput);

cloudsc_validate(klon, nlev, nclv, numcols, nproma,
plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif,
Expand Down
11 changes: 10 additions & 1 deletion src/cloudsc_hip/cloudsc/load_state.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
/*
* (C) Copyright 1988- ECMWF.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
* In applying this licence, ECMWF does not waive the privileges and immunities
* granted to it by virtue of its status as an intergovernmental organisation
* nor does it submit to any jurisdiction.
*/

#include "load_state.h"
//#include "yomcst_c.hpp"
#include <iostream>

#include <math.h>
Expand Down

0 comments on commit 54f7eea

Please sign in to comment.