Skip to content

Commit

Permalink
Merge pull request #116 from yoctoyotta1024/profiling
Browse files Browse the repository at this point in the history
feat: add kokkos profiling hooks to measure computational performance
  • Loading branch information
yoctoyotta1024 authored Dec 5, 2024
2 parents 4e49914 + f30212e commit 3bde009
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 5 deletions.
6 changes: 6 additions & 0 deletions libs/runcleo/creategbxs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <Kokkos_Pair.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include <iostream>
#include <memory>
#include <stdexcept>
Expand Down Expand Up @@ -240,6 +241,9 @@ void print_gbxs(const viewh_constgbx gbxs);
* This function creates Gridboxes based on the provided gridbox maps and initial conditions,
* and given super-droplets.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @tparam GbxMaps Type representing Gridbox Maps.
* @tparam GbxInitConds Type representing Gridbox initial conditions.
*
Expand All @@ -252,6 +256,8 @@ void print_gbxs(const viewh_constgbx gbxs);
template <GridboxMaps GbxMaps, typename GbxInitConds>
dualview_gbx create_gbxs(const GbxMaps &gbxmaps, const GbxInitConds &gbxic,
const viewd_supers totsupers) {
Kokkos::Profiling::ScopedRegion region("init_gbxs");

std::cout << "\n--- create gridboxes ---\ninitialising\n";
const dualview_gbx gbxs(initialise_gbxs(gbxmaps, gbxic, totsupers));

Expand Down
6 changes: 6 additions & 0 deletions libs/runcleo/createsupers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#define LIBS_RUNCLEO_CREATESUPERS_HPP_

#include <Kokkos_Core.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include <iostream>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -102,12 +103,17 @@ void print_supers(const viewd_constsupers supers);
* ordered by the gridbox indexes and generated using a generator which uses
* the initial conditions provided by the `SuperdropInitConds` type.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @tparam SuperdropInitConds The type of the super-droplets' initial conditions data.
* @param sdic The instance of the super-droplets' initial conditions data.
* @return A view of super-droplets in device memory.
*/
template <typename SuperdropInitConds>
viewd_supers create_supers(const SuperdropInitConds &sdic) {
Kokkos::Profiling::ScopedRegion region("init_supers");

// Log message and create superdrops using the initial conditions
std::cout << "\n--- create superdrops ---\ninitialising\n";
viewd_supers supers(initialise_supers(sdic));
Expand Down
27 changes: 23 additions & 4 deletions libs/runcleo/runcleo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include <Kokkos_Random.hpp>
#include <concepts>
#include <iostream>
Expand Down Expand Up @@ -212,13 +213,18 @@ class RunCLEO {
*
* This function runs SDM on both host and device from `t_mdl` to `t_next`.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @param t_mdl Current timestep of the coupled model.
* @param t_next Next timestep of the coupled model.
* @param gbxs DualView of gridboxes.
* @param totsupers View of all superdrops (both in and out of bounds of domain).
*/
void sdm_step(const unsigned int t_mdl, unsigned int t_next, dualview_gbx gbxs,
const viewd_supers totsupers) const {
Kokkos::Profiling::ScopedRegion region("timestep_sdm");

gbxs.sync_device(); // get device up to date with host
sdm.run_step(t_mdl, t_next, gbxs.view_device(), totsupers);
gbxs.modify_device(); // mark device view of gbxs as modified
Expand All @@ -229,10 +235,15 @@ class RunCLEO {
*
* This function runs the Coupled Dynamics on host from t_mdl to t_next.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @param t_mdl Current timestep of the coupled model.
* @param t_next Next timestep of the coupled model.
*/
void coupldyn_step(const unsigned int t_mdl, const unsigned int t_next) const {
Kokkos::Profiling::ScopedRegion region("timestep_coupldyn");

coupldyn.run_step(t_mdl, t_next);
}

Expand Down Expand Up @@ -287,21 +298,29 @@ class RunCLEO {
* Creates runtime objects, gridboxes, superdrops and random number generators
* using initial conditions, then prepares and performs CLEO timestepping.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @param initconds InitialConditions object containing initial conditions.
* @param t_end End time for timestepping.
* @return 0 on success.
*/
int operator()(const InitialConditions auto &initconds, const unsigned int t_end) const {
// create runtime objects
viewd_supers totsupers(create_supers(initconds.initsupers));
dualview_gbx gbxs(create_gbxs(sdm.gbxmaps, initconds.initgbxs, totsupers));
Kokkos::Profiling::pushRegion("runcleo");

// prepare CLEO for timestepping
// create runtime objects and prepare CLEO for timestepping
Kokkos::Profiling::pushRegion("init");
auto totsupers = create_supers(initconds.initsupers);
auto gbxs = create_gbxs(sdm.gbxmaps, initconds.initgbxs, totsupers);
prepare_to_timestep(gbxs);
Kokkos::Profiling::popRegion();

// do timestepping from t=0 to t=t_end
Kokkos::Profiling::pushRegion("timestep");
timestep_cleo(t_end, gbxs, totsupers);
Kokkos::Profiling::popRegion();

Kokkos::Profiling::popRegion();
return 0;
}
};
Expand Down
13 changes: 12 additions & 1 deletion libs/runcleo/sdmmethods.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include <Kokkos_Random.hpp>
#include <Kokkos_StdAlgorithms.hpp>

Expand Down Expand Up @@ -89,13 +90,18 @@ class SDMMethods {
* `movesupers` is an instance of the MoveSupersInDomain templated type with a certain
* instance of a type of GridboxMaps, super-droplets' Motion and boundary conditions.
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @param t_sdm Current timestep for SDM.
* @param d_gbxs View of gridboxes on device.
* @param totsupers View of all superdrops (both in and out of bounds of domain).
* @param mo Monitor of SDM processes.
*/
void superdrops_movement(const unsigned int t_sdm, viewd_gbx d_gbxs, const viewd_supers totsupers,
const SDMMonitor auto mo) const {
Kokkos::Profiling::ScopedRegion region("timestep_sdm_movement");

movesupers.run_step(t_sdm, gbxmaps, d_gbxs, totsupers, mo);
}

Expand All @@ -122,9 +128,12 @@ class SDMMethods {
* @brief run SDM microphysics for each gridbox (using sub-timestepping routine).
*
* This function runs SDM microphysics for each gridbox using a sub-timestepping routine.
* Kokkos::parallel_for is nested parallelism within parallelised loop over gridboxes,
* Kokkos::parallel_for is nested parallelism within parallelised loop over gridboxes,
* serial equivalent is simply: `for (size_t ii(0); ii < ngbxs; ++ii) { [...] }`
*
* Kokkos::Profiling are null pointers unless a Kokkos profiler library has been
* exported to "KOKKOS_TOOLS_LIBS" prior to runtime so the lib gets dynamically loaded.
*
* @param t_sdm Current timestep for SDM.
* @param t_next Next timestep for SDM.
* @param d_gbxs View of gridboxes on device.
Expand All @@ -133,6 +142,8 @@ class SDMMethods {
template <SDMMonitor SDMMo>
void operator()(const unsigned int t_sdm, const unsigned int t_next, const viewd_gbx d_gbxs,
const SDMMo mo) const {
Kokkos::Profiling::ScopedRegion region("timestep_sdm_microphysics");

// TODO(all) use scratch space for parallel region
const size_t ngbxs(d_gbxs.extent(0));
Kokkos::parallel_for(
Expand Down

0 comments on commit 3bde009

Please sign in to comment.