diff --git a/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py index e9c5f962d0..f50f048109 100644 --- a/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py +++ b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py @@ -17,10 +17,10 @@ from dataclasses import InitVar, dataclass, field from enum import Enum from typing import Final, Optional - from gt4py.next import as_field from gt4py.next.common import Dimension from gt4py.next.ffront.fbuiltins import Field, int32 +from icon4py.model.common.decomposition.definitions import DecompositionInfo from icon4py.model.atmosphere.diffusion.diffusion_states import ( DiffusionDiagnosticState, @@ -51,7 +51,6 @@ from icon4py.model.common.constants import ( CPD, - DEFAULT_PHYSICS_DYNAMICS_TIMESTEP_RATIO, GAS_CONSTANT_DRY_AIR, dbl_eps, ) @@ -293,11 +292,11 @@ def __post_init__(self, config): ) = self._determine_smagorinski_factor(config) object.__setattr__(self, "smagorinski_factor", smagorinski_factor) object.__setattr__(self, "smagorinski_height", smagorinski_height) - # see mo_interpol_nml.f90: + # nudge_max_coeff is already multiplied by factor of 5 in mo_interpol_nml.f90 object.__setattr__( self, "scaled_nudge_max_coeff", - config.nudge_max_coeff * DEFAULT_PHYSICS_DYNAMICS_TIMESTEP_RATIO, + config.nudge_max_coeff, ) def _determine_smagorinski_factor(self, config: DiffusionConfig): @@ -547,9 +546,9 @@ def _sync_cell_fields(self, prognostic_state): log.debug("communication of prognostic cell fields: theta, w, exner - start") self._exchange.exchange_and_wait( CellDim, - prognostic_state.w, - prognostic_state.theta_v, - prognostic_state.exner, + prognostic_state.w.ndarray[0 : self.grid.num_cells, :], + prognostic_state.theta_v.ndarray[0 : self.grid.num_cells, :], + prognostic_state.exner.ndarray[0 : self.grid.num_cells, :], ) log.debug("communication of prognostic cell fields: theta, w, exner - done") @@ -605,6 +604,7 @@ def _do_diffusion_step( vertex_end_local = self.grid.get_end_index( VertexDim, HorizontalMarkerIndex.local(VertexDim) ) + vertex_end_halo = self.grid.get_end_index(VertexDim, HorizontalMarkerIndex.halo(VertexDim)) # dtime dependent: enh_smag_factor, scale_k(self.enh_smag_fac, dtime, self.diff_multfac_smag, offset_provider={}) @@ -624,9 +624,12 @@ def _do_diffusion_step( ) log.debug("rbf interpolation 1: end") - # 2. HALO EXCHANGE -- CALL sync_patch_array_mult u_vert and v_vert log.debug("communication rbf extrapolation of vn - start") - self._exchange.exchange_and_wait(VertexDim, self.u_vert, self.v_vert) + self._exchange.exchange_and_wait( + VertexDim, + self.u_vert.ndarray[0 : self.grid.num_vertices, :], + self.v_vert.ndarray[0 : self.grid.num_vertices, :], + ) log.debug("communication rbf extrapolation of vn - end") log.debug("running stencil 01(calculate_nabla2_and_smag_coefficients_for_vn): start") @@ -684,7 +687,9 @@ def _do_diffusion_step( # TODO (magdalena) move this up and do asynchronous exchange if self.config.type_vn_diffu > 1: log.debug("communication rbf extrapolation of z_nable2_e - start") - self._exchange.exchange_and_wait(EdgeDim, self.z_nabla2_e) + self._exchange.exchange_and_wait( + EdgeDim, self.z_nabla2_e.ndarray[0 : self.grid.num_edges, :] + ) log.debug("communication rbf extrapolation of z_nable2_e - end") log.debug("2nd rbf interpolation: start") @@ -704,7 +709,11 @@ def _do_diffusion_step( # 6. HALO EXCHANGE -- CALL sync_patch_array_mult (Vertex Fields) log.debug("communication rbf extrapolation of z_nable2_e - start") - self._exchange.exchange_and_wait(VertexDim, self.u_vert, self.v_vert) + self._exchange.exchange_and_wait( + VertexDim, + self.u_vert.ndarray[0 : self.grid.num_vertices, :], + self.v_vert.ndarray[0 : self.grid.num_vertices, :], + ) log.debug("communication rbf extrapolation of z_nable2_e - end") log.debug("running stencils 04 05 06 (apply_diffusion_to_vn): start") @@ -734,7 +743,9 @@ def _do_diffusion_step( ) log.debug("running stencils 04 05 06 (apply_diffusion_to_vn): end") log.debug("communication of prognistic.vn : start") - handle_edge_comm = self._exchange.exchange(EdgeDim, prognostic_state.vn) + handle_edge_comm = self._exchange.exchange( + EdgeDim, prognostic_state.vn.ndarray[0 : self.grid.num_edges, :] + ) log.debug( "running stencils 07 08 09 10 (apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence): start" diff --git a/model/common/src/icon4py/model/common/config.py b/model/common/src/icon4py/model/common/config.py index f7f4389dcc..84395c459e 100644 --- a/model/common/src/icon4py/model/common/config.py +++ b/model/common/src/icon4py/model/common/config.py @@ -73,3 +73,7 @@ def device(self): @cached_property def limited_area(self): return os.environ.get("ICON4PY_LAM", False) + + @cached_property + def parallel_run(self): + return os.environ.get("ICON4PY_PARALLEL", False) diff --git a/model/common/src/icon4py/model/common/decomposition/definitions.py b/model/common/src/icon4py/model/common/decomposition/definitions.py index 54eeaf79c3..f66ed6f3c2 100644 --- a/model/common/src/icon4py/model/common/decomposition/definitions.py +++ b/model/common/src/icon4py/model/common/decomposition/definitions.py @@ -19,10 +19,9 @@ from enum import IntEnum from typing import Any, Protocol -import numpy as np -import numpy.ma as ma from gt4py.next import Dimension +from icon4py.model.common.settings import xp from icon4py.model.common.utils import builder @@ -72,12 +71,13 @@ class EntryType(IntEnum): HALO = 2 @builder - def with_dimension(self, dim: Dimension, global_index: np.ndarray, owner_mask: np.ndarray): - masked_global_index = ma.array(global_index, mask=owner_mask) - self._global_index[dim] = masked_global_index + def with_dimension(self, dim: Dimension, global_index: xp.ndarray, owner_mask: xp.ndarray): + self._global_index[dim] = global_index + self._owner_mask[dim] = owner_mask def __init__(self, klevels: int): self._global_index = {} + self._owner_mask = {} self._klevels = klevels @property @@ -90,31 +90,29 @@ def local_index(self, dim: Dimension, entry_type: EntryType = EntryType.ALL): return self._to_local_index(dim) case DecompositionInfo.EntryType.HALO: index = self._to_local_index(dim) - mask = self._global_index[dim].mask + mask = self._owner_mask[dim] return index[~mask] case DecompositionInfo.EntryType.OWNED: index = self._to_local_index(dim) - mask = self._global_index[dim].mask + mask = self._owner_mask[dim] return index[mask] def _to_local_index(self, dim): - data = ma.getdata(self._global_index[dim], subok=False) + data = self._global_index[dim] assert data.ndim == 1 - return np.arange(data.shape[0]) + return xp.arange(data.shape[0]) - def owner_mask(self, dim: Dimension) -> np.ndarray: - return self._global_index[dim].mask + def owner_mask(self, dim: Dimension) -> xp.ndarray: + return self._owner_mask[dim] def global_index(self, dim: Dimension, entry_type: EntryType = EntryType.ALL): match entry_type: case DecompositionInfo.EntryType.ALL: - return ma.getdata(self._global_index[dim], subok=False) + return self._global_index[dim] case DecompositionInfo.EntryType.OWNED: - global_index = self._global_index[dim] - return ma.getdata(global_index[global_index.mask]) + return self._global_index[dim][self._owner_mask[dim]] case DecompositionInfo.EntryType.HALO: - global_index = self._global_index[dim] - return ma.getdata(global_index[~global_index.mask]) + return self._global_index[dim][~self._owner_mask[dim]] case _: raise NotImplementedError() diff --git a/model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py b/model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py index d6fb764ee0..484b00e29a 100644 --- a/model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py +++ b/model/common/src/icon4py/model/common/decomposition/mpi_decomposition.py @@ -21,6 +21,7 @@ from gt4py.next import Dimension, Field from icon4py.model.common.decomposition.definitions import SingleNodeExchange +from icon4py.model.common.settings import device try: @@ -34,6 +35,7 @@ make_field_descriptor, make_pattern, ) + from ghex.util import Architecture mpi4py.rc.initialize = False mpi4py.rc.finalize = True @@ -50,6 +52,7 @@ if TYPE_CHECKING: import mpi4py.MPI +ghex_arch = Architecture.GPU if device.name == "GPU" else Architecture.CPU CommId = Union[int, "mpi4py.MPI.Comm", None] log = logging.getLogger(__name__) @@ -100,8 +103,10 @@ def filter(self, record: logging.LogRecord) -> bool: @definitions.get_processor_properties.register(definitions.MultiNodeRun) -def get_multinode_properties(s: definitions.MultiNodeRun) -> definitions.ProcessProperties: - return _get_processor_properties(with_mpi=True) +def get_multinode_properties( + s: definitions.MultiNodeRun, comm_id: CommId = None +) -> definitions.ProcessProperties: + return _get_processor_properties(with_mpi=True, comm_id=comm_id) @dataclass(frozen=True) @@ -202,15 +207,16 @@ def exchange(self, dim: definitions.Dimension, *fields: Sequence[Field]): domain_descriptor = self._domain_descriptors[dim] assert domain_descriptor is not None, f"domain descriptor for {dim.value} not found" applied_patterns = [ - pattern(make_field_descriptor(domain_descriptor, f.asnumpy())) for f in fields + pattern(make_field_descriptor(domain_descriptor, f, arch=ghex_arch)) for f in fields ] handle = self._comm.exchange(applied_patterns) - log.info(f"exchange for {len(fields)} fields of dimension ='{dim.value}' initiated.") + log.debug(f"exchange for {len(fields)} fields of dimension ='{dim.value}' initiated.") return MultiNodeResult(handle, applied_patterns) def exchange_and_wait(self, dim: Dimension, *fields: tuple): res = self.exchange(dim, *fields) res.wait() + log.debug(f"exchange for {len(fields)} fields of dimension ='{dim.value}' done.") @dataclass diff --git a/model/common/src/icon4py/model/common/grid/horizontal.py b/model/common/src/icon4py/model/common/grid/horizontal.py index 7ac2050f3b..461c71d3e1 100644 --- a/model/common/src/icon4py/model/common/grid/horizontal.py +++ b/model/common/src/icon4py/model/common/grid/horizontal.py @@ -163,13 +163,6 @@ def end(cls, dim: Dimension) -> int: return cls._end[dim] -@dataclass(frozen=True) -class HorizontalGridSize: - num_vertices: int - num_edges: int - num_cells: int - - class EdgeParams: def __init__( self, diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py index e8a02984e8..299064fc56 100644 --- a/model/common/src/icon4py/model/common/grid/utils.py +++ b/model/common/src/icon4py/model/common/grid/utils.py @@ -11,7 +11,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -import numpy as np from gt4py.next import Dimension, NeighborTableOffsetProvider from icon4py.model.common.settings import xp @@ -23,7 +22,7 @@ def neighbortable_offset_provider_for_1d_sparse_fields( neighbor_axis: Dimension, has_skip_values: bool, ): - table = xp.asarray(np.arange(old_shape[0] * old_shape[1]).reshape(old_shape)) + table = xp.asarray(xp.arange(old_shape[0] * old_shape[1]).reshape(old_shape)) return NeighborTableOffsetProvider( table, origin_axis, diff --git a/model/common/src/icon4py/model/common/settings.py b/model/common/src/icon4py/model/common/settings.py index b6d482eee5..f0ccc68649 100644 --- a/model/common/src/icon4py/model/common/settings.py +++ b/model/common/src/icon4py/model/common/settings.py @@ -18,3 +18,4 @@ xp = config.array_ns device = config.device limited_area = config.limited_area +parallel_run = config.parallel_run diff --git a/tools/src/icon4pytools/py2fgen/template.py b/tools/src/icon4pytools/py2fgen/template.py index f258ac5a55..e2e3728f8f 100644 --- a/tools/src/icon4pytools/py2fgen/template.py +++ b/tools/src/icon4pytools/py2fgen/template.py @@ -106,7 +106,12 @@ def build_array_size_args() -> dict[str, str]: array_size_args = {} from icon4py.model.common import dimension - for var_name, var in vars(dimension).items(): + from icon4pytools.py2fgen.wrapper_utils import dimension as wrapper_dimensions + + combined_dims = dict(vars(dimension)) + combined_dims.update(vars(wrapper_dimensions)) + + for var_name, var in combined_dims.items(): if isinstance(var, Dimension): dim_name = var_name.replace( "Dim", "" diff --git a/tools/src/icon4pytools/py2fgen/wrapper_utils/__init__.py b/tools/src/icon4pytools/py2fgen/wrapper_utils/__init__.py new file mode 100644 index 0000000000..a6d2d236c9 --- /dev/null +++ b/tools/src/icon4pytools/py2fgen/wrapper_utils/__init__.py @@ -0,0 +1,13 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + diff --git a/tools/src/icon4pytools/py2fgen/wrapper_utils/debug_output.py b/tools/src/icon4pytools/py2fgen/wrapper_utils/debug_output.py new file mode 100644 index 0000000000..481360189f --- /dev/null +++ b/tools/src/icon4pytools/py2fgen/wrapper_utils/debug_output.py @@ -0,0 +1,166 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import numpy as np +from icon4py.model.common.decomposition import definitions +from icon4py.model.common.dimension import ( + C2E2CDim, + C2EDim, + CellDim, + E2C2VDim, + E2CDim, + EdgeDim, + V2EDim, + VertexDim, +) +from icon4py.model.common.grid.icon import IconGrid +from icon4py.model.common.settings import xp + +from icon4pytools.common.logger import setup_logger + + +log = setup_logger(__name__) + + +def print_grid_decomp_info( + icon_grid: IconGrid, + processor_props: definitions.ProcessProperties, + decomposition_info: definitions.DecompositionInfo, + num_cells: int, + num_edges: int, + num_verts: int, +): + log.debug("icon_grid:cell_start%s", icon_grid.start_indices[CellDim]) + log.debug("icon_grid:cell_end:%s", icon_grid.end_indices[CellDim]) + log.debug("icon_grid:vert_start:%s", icon_grid.start_indices[VertexDim]) + log.debug("icon_grid:vert_end:%s", icon_grid.end_indices[VertexDim]) + log.debug("icon_grid:edge_start:%s", icon_grid.start_indices[EdgeDim]) + log.debug("icon_grid:edge_end:%s", icon_grid.end_indices[EdgeDim]) + log.debug("icon_grid:c2e:%s", icon_grid.connectivities[C2EDim]) + log.debug("icon_grid:c2e2c:%s", icon_grid.connectivities[C2E2CDim]) + log.debug("icon_grid:v2e:%s", icon_grid.connectivities[V2EDim]) + log.debug("icon_grid:e2c2v:%s", icon_grid.connectivities[E2C2VDim]) + log.debug("icon_grid:e2c:%s", icon_grid.connectivities[E2CDim]) + + log.debug( + "icon_grid:cell_start for rank %s is.... %s", + processor_props.rank, + icon_grid.start_indices[CellDim], + ) + log.debug( + "icon_grid:cell_end for rank %s is.... %s", + processor_props.rank, + icon_grid.end_indices[CellDim], + ) + log.debug( + "icon_grid:vert_start for rank %s is.... %s", + processor_props.rank, + icon_grid.start_indices[VertexDim], + ) + log.debug( + "icon_grid:vert_end for rank %s is.... %s", + processor_props.rank, + icon_grid.end_indices[VertexDim], + ) + log.debug( + "icon_grid:edge_start for rank %s is.... %s", + processor_props.rank, + icon_grid.start_indices[EdgeDim], + ) + log.debug( + "icon_grid:edge_end for rank %s is.... %s", + processor_props.rank, + icon_grid.end_indices[EdgeDim], + ) + log.debug( + "icon_grid:c2e for rank %s is.... %s", + processor_props.rank, + icon_grid.connectivities[C2EDim], + ) + log.debug( + "icon_grid:c2e2c for rank %s is.... %s", + processor_props.rank, + icon_grid.connectivities[C2E2CDim], + ) + log.debug( + "icon_grid:v2e for rank %s is.... %s", + processor_props.rank, + icon_grid.connectivities[V2EDim], + ) + log.debug( + "icon_grid:e2c2v for rank %s is.... %s", + processor_props.rank, + icon_grid.connectivities[E2C2VDim], + ) + log.debug( + "icon_grid:e2c for rank %s is.... %s", + processor_props.rank, + icon_grid.connectivities[E2CDim], + ) + + xp.set_printoptions(edgeitems=20) + + log.debug( + "c_glb_index for rank %s is.... %s", + processor_props.rank, + decomposition_info.global_index(CellDim)[0:num_cells], + ) + log.debug( + "e_glb_index for rank %s is.... %s", + processor_props.rank, + decomposition_info.global_index(EdgeDim)[0:num_edges], + ) + log.debug( + "v_glb_index for rank %s is.... %s", + processor_props.rank, + decomposition_info.global_index(VertexDim)[0:num_verts], + ) + + log.debug( + "c_owner_mask for rank %s is.... %s", + processor_props.rank, + decomposition_info.owner_mask(CellDim)[0:num_cells], + ) + log.debug( + "e_owner_mask for rank %s is.... %s", + processor_props.rank, + decomposition_info.owner_mask(EdgeDim)[0:num_edges], + ) + log.debug( + "v_owner_mask for rank %s is.... %s", + processor_props.rank, + decomposition_info.owner_mask(VertexDim)[0:num_verts], + ) + + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: inializing dycore for experiment 'mch_ch_r04_b09_dsl" + ) + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: decomposition info : klevels = {decomposition_info.klevels} " + f"local cells = {decomposition_info.global_index(CellDim, definitions.DecompositionInfo.EntryType.ALL).shape} " + f"local edges = {decomposition_info.global_index(EdgeDim, definitions.DecompositionInfo.EntryType.ALL).shape} " + f"local vertices = {decomposition_info.global_index(VertexDim, definitions.DecompositionInfo.EntryType.ALL).shape}" + ) + owned_cells = decomposition_info.owner_mask(CellDim) + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: GHEX context setup: from {processor_props.comm_name} with {processor_props.comm_size} nodes" + ) + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: number of halo cells {np.count_nonzero(np.invert(owned_cells))}" + ) + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: number of halo edges {np.count_nonzero(np.invert(decomposition_info.owner_mask(EdgeDim)))}" + ) + log.debug( + f"rank={processor_props.rank}/{processor_props.comm_size}: number of halo cells {np.count_nonzero(np.invert(owned_cells))}" + ) diff --git a/tools/src/icon4pytools/py2fgen/wrapper_utils/dimension.py b/tools/src/icon4pytools/py2fgen/wrapper_utils/dimension.py new file mode 100644 index 0000000000..6e3534071a --- /dev/null +++ b/tools/src/icon4pytools/py2fgen/wrapper_utils/dimension.py @@ -0,0 +1,24 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +from gt4py.next.ffront.fbuiltins import Dimension + + +SingletonDim = Dimension("Singleton") +SpecialADim = Dimension("SpecialA") +SpecialBDim = Dimension("SpecialB") +SpecialCDim = Dimension("SpecialC") +CellIndexDim = Dimension("CellIndex") +EdgeIndexDim = Dimension("EdgeIndex") +VertexIndexDim = Dimension("VertexIndex") diff --git a/tools/src/icon4pytools/py2fgen/wrapper_utils/grid_utils.py b/tools/src/icon4pytools/py2fgen/wrapper_utils/grid_utils.py new file mode 100644 index 0000000000..300ea6f061 --- /dev/null +++ b/tools/src/icon4pytools/py2fgen/wrapper_utils/grid_utils.py @@ -0,0 +1,169 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import logging + +import numpy as np +from icon4py.model.common.decomposition import definitions +from icon4py.model.common.decomposition.definitions import ( + DecompositionInfo, + MultiNodeRun, +) +from icon4py.model.common.decomposition.mpi_decomposition import get_multinode_properties +from icon4py.model.common.dimension import ( + C2E2CDim, + C2E2CODim, + C2EDim, + CECDim, + CEDim, + CellDim, + E2C2VDim, + E2CDim, + ECVDim, + EdgeDim, + V2EDim, + VertexDim, +) +from icon4py.model.common.grid.base import GridConfig, HorizontalGridSize, VerticalGridSize +from icon4py.model.common.grid.icon import IconGrid +from icon4py.model.common.settings import xp + + +log = logging.getLogger(__name__) + + +def construct_icon_grid( + cells_start_index, + cells_end_index, + vertex_start_index, + vertex_end_index, + edge_start_index, + edge_end_index, + num_cells, + num_edges, + num_vertices, + num_levels, + c2e, + c2e2c, + v2e, + e2c2v, + e2c, + limited_area: bool, + on_gpu: bool, +) -> IconGrid: + log.debug("Constructing icon grid in py") + log.debug("num_cells:%s", num_cells) + log.debug("num_edges:%s", num_edges) + log.debug("num_vertices:%s", num_vertices) + log.debug("num_levels:%s", num_levels) + + cells_start_index_np = offset_fortran_indices_return_numpy(cells_start_index) + vertex_start_index_np = offset_fortran_indices_return_numpy(vertex_start_index) + edge_start_index_np = offset_fortran_indices_return_numpy(edge_start_index) + + cells_end_index_np = cells_end_index.asnumpy() + vertex_end_index_np = vertex_end_index.asnumpy() + edge_end_index_np = edge_end_index.asnumpy() + + c2e_loc = offset_squeeze_fortran_indices_return_xp(c2e) + c2e2c_loc = offset_squeeze_fortran_indices_return_xp(c2e2c) + v2e_loc = offset_squeeze_fortran_indices_return_xp(v2e) + e2c2v_loc = offset_squeeze_fortran_indices_return_xp(e2c2v) + e2c_loc = offset_squeeze_fortran_indices_return_xp(e2c) + + config = GridConfig( + horizontal_config=HorizontalGridSize( + num_vertices=num_vertices, + num_cells=num_cells, + num_edges=num_edges, + ), + vertical_config=VerticalGridSize(num_lev=num_levels), + limited_area=limited_area, + on_gpu=on_gpu, + ) + log.debug(" c2e2c.shape[0] %s", c2e2c_loc.shape[0]) + log.debug(" xp.asarray(range(c2e2c.shape[0]))) %s", xp.asarray(range(c2e2c_loc.shape[0])).shape) + c2e2c0 = xp.column_stack(((xp.asarray(range(c2e2c_loc.shape[0]))), c2e2c_loc)) + + grid = ( + IconGrid() + .with_config(config) + .with_start_end_indices(VertexDim, vertex_start_index_np, vertex_end_index_np) + .with_start_end_indices(EdgeDim, edge_start_index_np, edge_end_index_np) + .with_start_end_indices(CellDim, cells_start_index_np, cells_end_index_np) + .with_connectivities( + { + C2EDim: c2e_loc, + E2CDim: e2c_loc, + C2E2CDim: c2e2c_loc, + C2E2CODim: c2e2c0, + } + ) + .with_connectivities( + { + V2EDim: v2e_loc, + E2C2VDim: e2c2v_loc, + } + ) + ) + + grid.update_size_connectivities( + { + ECVDim: grid.size[EdgeDim] * grid.size[E2C2VDim], + CEDim: grid.size[CellDim] * grid.size[C2EDim], + CECDim: grid.size[CellDim] * grid.size[C2E2CDim], + } + ) + + return grid + + +def construct_decomposition( + c_glb_index, + e_glb_index, + v_glb_index, + c_owner_mask, + e_owner_mask, + v_owner_mask, + num_cells: int, + num_edges: int, + num_verts: int, + num_levels: int, + comm_id: int, +): + c_glb_index_np = offset_fortran_indices_return_numpy(c_glb_index) + e_glb_index_np = offset_fortran_indices_return_numpy(e_glb_index) + v_glb_index_np = offset_fortran_indices_return_numpy(v_glb_index) + + c_owner_mask_np = c_owner_mask.asnumpy()[0:num_cells] + e_owner_mask_np = e_owner_mask.asnumpy()[0:num_edges] + v_owner_mask_np = v_owner_mask.asnumpy()[0:num_verts] + + decomposition_info = ( + DecompositionInfo(klevels=num_levels) + .with_dimension(CellDim, c_glb_index_np, c_owner_mask_np) + .with_dimension(EdgeDim, e_glb_index_np, e_owner_mask_np) + .with_dimension(VertexDim, v_glb_index_np, v_owner_mask_np) + ) + processor_props = get_multinode_properties(MultiNodeRun(), comm_id) + exchange = definitions.create_exchange(processor_props, decomposition_info) + + return processor_props, decomposition_info, exchange + + +def offset_fortran_indices_return_numpy(inp) -> np.ndarray: + return np.subtract(inp.asnumpy(), 1) + + +def offset_squeeze_fortran_indices_return_xp(inp) -> xp.ndarray: + return xp.squeeze(xp.subtract(inp.ndarray, 1)) diff --git a/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py b/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py index d264b17b31..8f83382c9f 100644 --- a/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py +++ b/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py @@ -55,19 +55,34 @@ ) from icon4py.model.common.grid.horizontal import CellParams, EdgeParams from icon4py.model.common.grid.vertical import VerticalModelParams -from icon4py.model.common.settings import device, limited_area +from icon4py.model.common.settings import device, parallel_run from icon4py.model.common.states.prognostic_state import PrognosticState from icon4py.model.common.test_utils.grid_utils import load_grid_from_file from icon4py.model.common.test_utils.helpers import as_1D_sparse_field, flatten_first_two_dims +from icon4py.model.common.test_utils.parallel_helpers import check_comm_size from icon4pytools.common.logger import setup_logger from icon4pytools.py2fgen.utils import get_grid_filename, get_icon_grid_loc +from icon4pytools.py2fgen.wrapper_utils.debug_output import print_grid_decomp_info +from icon4pytools.py2fgen.wrapper_utils.dimension import ( + CellIndexDim, + EdgeIndexDim, + SingletonDim, + SpecialADim, + SpecialBDim, + SpecialCDim, + VertexIndexDim, +) +from icon4pytools.py2fgen.wrapper_utils.grid_utils import ( + construct_decomposition, + construct_icon_grid, +) -logger = setup_logger(__name__) +log = setup_logger(__name__) # global diffusion object -diffusion_granule: Diffusion = Diffusion() +diffusion_granule: Diffusion = None # global profiler object profiler = cProfile.Profile() @@ -134,23 +149,83 @@ def diffusion_init( primal_normal_cell_y: Field[[EdgeDim, E2CDim], float64], dual_normal_cell_x: Field[[EdgeDim, E2CDim], float64], dual_normal_cell_y: Field[[EdgeDim, E2CDim], float64], + limited_area: bool, + num_cells: int32, + num_edges: int32, + num_verts: int32, + cells_start_index: Field[[CellIndexDim], int32], + cells_end_index: Field[[CellIndexDim], int32], + edge_start_index: Field[[EdgeIndexDim], int32], + edge_end_index: Field[[EdgeIndexDim], int32], + vert_start_index: Field[[VertexIndexDim], int32], + vert_end_index: Field[[VertexIndexDim], int32], + c2e: Field[[CellDim, SingletonDim, C2EDim], int32], + c2e2c: Field[[CellDim, SingletonDim, C2E2CDim], int32], + v2e: Field[[VertexDim, SingletonDim, V2EDim], int32], + e2c2v: Field[[EdgeDim, SingletonDim, E2C2VDim], int32], + e2c: Field[[EdgeDim, SingletonDim, E2CDim], int32], + c_owner_mask: Field[[CellDim], bool], + e_owner_mask: Field[[EdgeDim], bool], + v_owner_mask: Field[[VertexDim], bool], + c_glb_index: Field[[SpecialADim], int32], + e_glb_index: Field[[SpecialBDim], int32], + v_glb_index: Field[[SpecialCDim], int32], + comm_id: int32, ): - logger.info(f"Using Device = {device}") + log.info(f"Using Device = {device}") # ICON grid - if device.name == "GPU": - on_gpu = True - else: - on_gpu = False + on_gpu = True if device.name == "GPU" else False + + if parallel_run: + icon_grid = construct_icon_grid( + cells_start_index, + cells_end_index, + vert_start_index, + vert_end_index, + edge_start_index, + edge_end_index, + num_cells, + num_edges, + num_verts, + num_levels, + c2e, + c2e2c, + v2e, + e2c2v, + e2c, + True, + on_gpu, + ) - grid_file_path = os.path.join(get_icon_grid_loc(), get_grid_filename()) + processor_props, decomposition_info, exchange = construct_decomposition( + c_glb_index, + e_glb_index, + v_glb_index, + c_owner_mask, + e_owner_mask, + v_owner_mask, + num_cells, + num_edges, + num_verts, + num_levels, + comm_id, + ) - icon_grid = load_grid_from_file( - grid_file=grid_file_path, - num_levels=num_levels, - on_gpu=on_gpu, - limited_area=True if limited_area else False, - ) + check_comm_size(processor_props) + + print_grid_decomp_info( + icon_grid, processor_props, decomposition_info, num_cells, num_edges, num_verts + ) + else: + grid_file_path = os.path.join(get_icon_grid_loc(), get_grid_filename()) + + icon_grid = load_grid_from_file( + grid_file=grid_file_path, + num_levels=num_levels, + on_gpu=on_gpu, + limited_area=True if limited_area else False, + ) # Edge geometry edge_params = EdgeParams( @@ -223,6 +298,14 @@ def diffusion_init( geofac_grg_y=geofac_grg_y, nudgecoeff_e=nudgecoeff_e, ) + + # We need the global keyword here + global diffusion_granule + if parallel_run: + diffusion_granule = Diffusion(exchange=exchange) + else: + diffusion_granule = Diffusion() + diffusion_granule.init( grid=icon_grid, config=config,