Merge remote-tracking branch 'upstream/main' into fix_gpu_tox

GridTools · Nov 15, 2023 · 2444e1f · 2444e1f
2 parents 2dd4c70 + 0df592d
commit 2444e1f
Show file tree

Hide file tree

Showing 66 changed files with 2,277 additions and 628 deletions.
diff --git a/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md b/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md
@@ -7,10 +7,13 @@ tags: []
 - **Status**: valid
 - **Authors**: Hannes Vogt (@havogt)
 - **Created**: 2022-06-29
-- **Updated**: 2022-06-29
+- **Updated**: 2023-11-08
 
 This document proposes a (temporary) solution for mapping domain dimensions to field dimensions.
 
+> [!NOTE]
+> This ADR was written before the integration of `gt4py.storage` into `gt4py.next`, so the example is using `np_as_located_field` (now deprecated) instead of `gtx.as_field.partial`. The idea conveyed by the example remains unchanged.
+
 ## Context
 
 The Python embedded execution for Iterator IR keeps track of the current location type of an iterator (allows safety checks) while the C++ backend does not.

diff --git a/docs/development/ADRs/Index.md b/docs/development/ADRs/Index.md
@@ -45,6 +45,7 @@ _None_
 - [0006 - C++ Backend](0006-Cpp-Backend.md)
 - [0007 - Fencil Processors](0007-Fencil-Processors.md)
 - [0008 - Mapping Domain to Cpp Backend](0008-Mapping_Domain_to_Cpp-Backend.md)
+- [0016 - Multiple Backends and Build Systems](0016-Multiple-Backends-and-Build-Systems.md)
 
 ### Python Integration
 

diff --git a/docs/user/next/QuickstartGuide.md b/docs/user/next/QuickstartGuide.md
@@ -51,7 +51,7 @@ from gt4py.next import float64, neighbor_sum, where
 
 #### Fields
 
-Fields store data as a multi-dimensional array, and are defined over a set of named dimensions. The code snippet below defines two named dimensions, _cell_ and _K_, and creates the fields `a` and `b` over their cartesian product using the `np_as_located_field` helper function. The fields contain the values 2 for `a` and 3 for `b` for all entries.
+Fields store data as a multi-dimensional array, and are defined over a set of named dimensions. The code snippet below defines two named dimensions, _Cell_ and _K_, and creates the fields `a` and `b` over their cartesian product using the `gtx.as_field` helper function. The fields contain the values 2 for `a` and 3 for `b` for all entries.
 
 ```{code-cell} ipython3
 CellDim = gtx.Dimension("Cell")
@@ -63,8 +63,20 @@ grid_shape = (num_cells, num_layers)
 
 a_value = 2.0
 b_value = 3.0
-a = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=a_value, dtype=np.float64))
-b = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=b_value, dtype=np.float64))
+a = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=a_value, dtype=np.float64))
+b = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=b_value, dtype=np.float64))
+```
+
+Additional numpy-equivalent constructors are available, namely `ones`, `zeros`, `empty`, `full`. These require domain, dtype, and allocator (e.g. a backend) specifications.
+
+```{code-cell} ipython3
+from gt4py._core import definitions as core_defs
+array_of_ones_numpy = np.ones((grid_shape[0], grid_shape[1]))
+field_of_ones = gtx.constructors.ones(
+    domain={I: range(grid_shape[0]), J: range(grid_shape[0])},
+    dtype=core_defs.dtype(np.float64),
+    allocator=gtx.program_processors.runners.roundtrip.backend
+)
 ```
 
 _Note: The interface to construct fields is provisional only and will change soon._
@@ -87,7 +99,7 @@ def add(a: gtx.Field[[CellDim, KDim], float64],
 You can call field operators from [programs](#Programs), other field operators, or directly. The code snippet below shows a direct call, in which case you have to supply two additional arguments: `out`, which is a field to write the return value to, and `offset_provider`, which is left empty for now. The result of the field operator is a field with all entries equal to 5, but for brevity, only the average and the standard deviation of the entries are printed:
 
 ```{code-cell} ipython3
-result = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
+result = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
 add(a, b, out=result, offset_provider={})
 
 print("{} + {} = {} ± {}".format(a_value, b_value, np.average(np.asarray(result)), np.std(np.asarray(result))))
@@ -113,7 +125,7 @@ def run_add(a : gtx.Field[[CellDim, KDim], float64],
 You can execute the program by simply calling it:
 
 ```{code-cell} ipython3
-result = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
+result = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
 run_add(a, b, result, offset_provider={})
 
 print("{} + {} = {} ± {}".format(b_value, (a_value + b_value), np.average(np.asarray(result)), np.std(np.asarray(result))))
@@ -200,8 +212,8 @@ cell_to_edge_table = np.array([
 Let's start by defining two fields: one over the cells and another one over the edges. The field over cells serves input for subsequent calculations and is therefore filled up with values, whereas the field over the edges stores the output of the calculations and is therefore left blank.
 
 ```{code-cell} ipython3
-cell_values = gtx.np_as_located_field(CellDim)(np.array([1.0, 1.0, 2.0, 3.0, 5.0, 8.0]))
-edge_values = gtx.np_as_located_field(EdgeDim)(np.zeros((12,)))
+cell_values = gtx.as_field([CellDim], np.array([1.0, 1.0, 2.0, 3.0, 5.0, 8.0]))
+edge_values = gtx.as_field([EdgeDim], np.zeros((12,)))
 ```
 
 | ![cell_values](connectivity_cell_field.svg) |
@@ -295,8 +307,8 @@ This function takes 3 input arguments:
   In the case where the true and false branches are either fields or scalars, the resulting output will be a field including all dimensions from all inputs. For example:
 
 ```{code-cell} ipython3
-mask = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape, dtype=bool))
-result_where = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
+mask = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape, dtype=bool))
+result_where = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
 b = 6.0
 
 @gtx.field_operator
@@ -313,8 +325,8 @@ print("where return: {}".format(np.asarray(result_where)))
 The `where` supports the return of tuples of fields. To perform promotion of dimensions and dtype of the output, all arguments are analyzed and promoted as in the above section.
 
 ```{code-cell} ipython3
-result_1 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
-result_2 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
+result_1 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
+result_2 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
 
 @gtx.field_operator
 def _conditional_tuple(mask: gtx.Field[[CellDim, KDim], bool], a: gtx.Field[[CellDim, KDim], float64], b: float
@@ -338,13 +350,13 @@ The `where` builtin also allows for nesting of tuples. In this scenario, it will
 and then combine results to match the return type:
 
 ```{code-cell} ipython3
-a = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=2.0, dtype=np.float64))
-b = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=3.0, dtype=np.float64))
-c = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=4.0, dtype=np.float64))
-d = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=5.0, dtype=np.float64))
+a = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=2.0, dtype=np.float64))
+b = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=3.0, dtype=np.float64))
+c = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=4.0, dtype=np.float64))
+d = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=5.0, dtype=np.float64))
 
-result_1 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
-result_2 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape))
+result_1 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
+result_2 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape))
 
 @gtx.field_operator
 def _conditional_tuple_nested(
@@ -402,7 +414,7 @@ edge_weights = np.array([
     [0, -1, -1], # cell 5
 ], dtype=np.float64)
 
-edge_weight_field = gtx.np_as_located_field(CellDim, C2EDim)(edge_weights)
+edge_weight_field = gtx.as_field([CellDim, C2EDim], edge_weights)
 ```
 
 Now you have everything to implement the pseudo-laplacian. Its field operator requires the cell field and the edge weights as inputs, and outputs a cell field of the same shape as the input.
@@ -428,7 +440,7 @@ def run_pseudo_laplacian(cells : gtx.Field[[CellDim], float64],
                          out : gtx.Field[[CellDim], float64]):
     pseudo_lap(cells, edge_weights, out=out)
 
-result_pseudo_lap = gtx.np_as_located_field(CellDim)(np.zeros(shape=(6,)))
+result_pseudo_lap = gtx.as_field([CellDim], np.zeros(shape=(6,)))
 
 run_pseudo_laplacian(cell_values,
                      edge_weight_field,

diff --git a/src/gt4py/_core/definitions.py b/src/gt4py/_core/definitions.py
@@ -25,6 +25,7 @@
 import numpy as np
 import numpy.typing as npt
 
+import gt4py.eve as eve
 from gt4py.eve.extended_typing import (
     TYPE_CHECKING,
     Any,
@@ -71,33 +72,33 @@
 float64 = np.float64
 
 BoolScalar: TypeAlias = Union[bool_, bool]
-BoolT = TypeVar("BoolT", bound=Union[bool_, bool])
+BoolT = TypeVar("BoolT", bound=BoolScalar)
 BOOL_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], BoolScalar.__args__)  # type: ignore[attr-defined]
 
 
 IntScalar: TypeAlias = Union[int8, int16, int32, int64, int]
-IntT = TypeVar("IntT", bound=Union[int8, int16, int32, int64, int])
+IntT = TypeVar("IntT", bound=IntScalar)
 INT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], IntScalar.__args__)  # type: ignore[attr-defined]
 
 
 UnsignedIntScalar: TypeAlias = Union[uint8, uint16, uint32, uint64]
-UnsignedIntT = TypeVar("UnsignedIntT", bound=Union[uint8, uint16, uint32, uint64])
+UnsignedIntT = TypeVar("UnsignedIntT", bound=UnsignedIntScalar)
 UINT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], UnsignedIntScalar.__args__)  # type: ignore[attr-defined]
 
 
 IntegralScalar: TypeAlias = Union[IntScalar, UnsignedIntScalar]
-IntegralT = TypeVar("IntegralT", bound=Union[IntScalar, UnsignedIntScalar])
+IntegralT = TypeVar("IntegralT", bound=IntegralScalar)
 INTEGRAL_TYPES: Final[Tuple[type, ...]] = (*INT_TYPES, *UINT_TYPES)
 
 
 FloatingScalar: TypeAlias = Union[float32, float64, float]
-FloatingT = TypeVar("FloatingT", bound=Union[float32, float64, float])
+FloatingT = TypeVar("FloatingT", bound=FloatingScalar)
 FLOAT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], FloatingScalar.__args__)  # type: ignore[attr-defined]
 
 
 #: Type alias for all scalar types supported by GT4Py
 Scalar: TypeAlias = Union[BoolScalar, IntegralScalar, FloatingScalar]
-ScalarT = TypeVar("ScalarT", bound=Union[BoolScalar, IntegralScalar, FloatingScalar])
+ScalarT = TypeVar("ScalarT", bound=Scalar)
 SCALAR_TYPES: Final[tuple[type, ...]] = (*BOOL_TYPES, *INTEGRAL_TYPES, *FLOAT_TYPES)
 
 
@@ -139,7 +140,7 @@ def is_valid_tensor_shape(
 
 
 # -- Data type descriptors --
-class DTypeKind(enum.Enum):
+class DTypeKind(eve.StrEnum):
     """
     Kind of a specific data type.
 
@@ -368,7 +369,7 @@ def __gt_origin__(self) -> Tuple[int, ...]:
 
 
 # -- Device representation --
-class DeviceType(enum.Enum):
+class DeviceType(enum.IntEnum):
     """The type of the device where a memory buffer is allocated.
 
     Enum values taken from DLPack reference implementation at:
@@ -385,8 +386,31 @@ class DeviceType(enum.Enum):
     ROCM = 10
 
 
+CPUDeviceTyping: TypeAlias = Literal[DeviceType.CPU]
+CUDADeviceTyping: TypeAlias = Literal[DeviceType.CUDA]
+CPUPinnedDeviceTyping: TypeAlias = Literal[DeviceType.CPU_PINNED]
+OpenCLDeviceTyping: TypeAlias = Literal[DeviceType.OPENCL]
+VulkanDeviceTyping: TypeAlias = Literal[DeviceType.VULKAN]
+MetalDeviceTyping: TypeAlias = Literal[DeviceType.METAL]
+VPIDeviceTyping: TypeAlias = Literal[DeviceType.VPI]
+ROCMDeviceTyping: TypeAlias = Literal[DeviceType.ROCM]
+
+
+DeviceTypeT = TypeVar(
+    "DeviceTypeT",
+    CPUDeviceTyping,
+    CUDADeviceTyping,
+    CPUPinnedDeviceTyping,
+    OpenCLDeviceTyping,
+    VulkanDeviceTyping,
+    MetalDeviceTyping,
+    VPIDeviceTyping,
+    ROCMDeviceTyping,
+)
+
+
 @dataclasses.dataclass(frozen=True)
-class Device:
+class Device(Generic[DeviceTypeT]):
     """
     Representation of a computing device.
 
@@ -397,10 +421,10 @@ class Device:
     core number, for `DeviceType.CUDA` it could be the CUDA device number, etc.
     """
 
-    device_type: DeviceType
+    device_type: DeviceTypeT
     device_id: int
 
-    def __iter__(self) -> Iterator[DeviceType | int]:
+    def __iter__(self) -> Iterator[DeviceTypeT | int]:
         yield self.device_type
         yield self.device_id
 
@@ -409,7 +433,7 @@ def __iter__(self) -> Iterator[DeviceType | int]:
 SliceLike = Union[int, Tuple[int, ...], None, slice, "NDArrayObject"]
 
 
-class NDArrayObjectProto(Protocol):
+class NDArrayObject(Protocol):
     @property
     def ndim(self) -> int:
         ...
@@ -422,7 +446,7 @@ def shape(self) -> tuple[int, ...]:
     def dtype(self) -> Any:
         ...
 
-    def __getitem__(self, item: SliceLike) -> NDArrayObject:
+    def __getitem__(self, item: Any) -> NDArrayObject:
         ...
 
     def __abs__(self) -> NDArrayObject:
@@ -434,38 +458,32 @@ def __neg__(self) -> NDArrayObject:
     def __add__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
 
-    def __radd__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
+    def __radd__(self, other: Any) -> NDArrayObject:
         ...
 
     def __sub__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
 
-    def __rsub__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
+    def __rsub__(self, other: Any) -> NDArrayObject:
         ...
 
     def __mul__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
 
-    def __rmul__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
+    def __rmul__(self, other: Any) -> NDArrayObject:
         ...
 
     def __floordiv__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
 
-    def __rfloordiv__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
+    def __rfloordiv__(self, other: Any) -> NDArrayObject:
         ...
 
     def __truediv__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
 
-    def __rtruediv__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
+    def __rtruediv__(self, other: Any) -> NDArrayObject:
         ...
 
     def __pow__(self, other: NDArrayObject | Scalar) -> NDArrayObject:
         ...
-
-
-NDArrayObject = Union[npt.NDArray, "CuPyNDArray", "JaxNDArray", NDArrayObjectProto]
-NDArrayObjectT = TypeVar(
-    "NDArrayObjectT", npt.NDArray, "CuPyNDArray", "JaxNDArray", NDArrayObjectProto, covariant=True
-)
diff --git a/src/gt4py/eve/codegen.py b/src/gt4py/eve/codegen.py
@@ -155,7 +155,7 @@ def format_cpp_source(
     ) -> str:
         """Format C++ source code using clang-format."""
         assert isinstance(_CLANG_FORMAT_EXECUTABLE, str)
-        args = [_CLANG_FORMAT_EXECUTABLE]
+        args = [_CLANG_FORMAT_EXECUTABLE, "--assume-filename=_gt4py_generated_file.cpp"]
         if style:
             args.append(f"--style={style}")
         if fallback_style: