From d62c188372224ad0e89b5fb2030736ab9e25988c Mon Sep 17 00:00:00 2001
From: Enrique Gonzalez Paredes <enriqueg@cscs.ch>
Date: Fri, 20 Sep 2024 13:03:55 +0200
Subject: [PATCH] Second step

---
 src/gt4py/_core/definitions.py       |  1 +
 src/gt4py/storage/allocators.py      | 12 ----------
 src/gt4py/storage/cartesian/utils.py | 33 +++++++---------------------
 3 files changed, 9 insertions(+), 37 deletions(-)

diff --git a/src/gt4py/_core/definitions.py b/src/gt4py/_core/definitions.py
index 880e20309a..9d07b2eb79 100644
--- a/src/gt4py/_core/definitions.py
+++ b/src/gt4py/_core/definitions.py
@@ -382,6 +382,7 @@ class DeviceType(enum.IntEnum):
     CUDA_MANAGED = 13
     ONE_API = 14
 
+
 CPUDeviceTyping: TypeAlias = Literal[DeviceType.CPU]
 CUDADeviceTyping: TypeAlias = Literal[DeviceType.CUDA]
 CPUPinnedDeviceTyping: TypeAlias = Literal[DeviceType.CPU_PINNED]
diff --git a/src/gt4py/storage/allocators.py b/src/gt4py/storage/allocators.py
index fa9005e86b..298b9c2e5a 100644
--- a/src/gt4py/storage/allocators.py
+++ b/src/gt4py/storage/allocators.py
@@ -259,18 +259,6 @@ def allocate(
             buffer, dtype, shape, padded_shape, item_size, strides, byte_offset
         )
 
-        if self.device_type == core_defs.DeviceType.ROCM:
-            # until we can rely on dlpack
-            ndarray.__hip_array_interface__ = {  # type: ignore[attr-defined]
-                "shape": ndarray.shape,  # type: ignore[union-attr]
-                "typestr": ndarray.dtype.descr[0][1],  # type: ignore[union-attr]
-                "descr": ndarray.dtype.descr,  # type: ignore[union-attr]
-                "stream": 1,
-                "version": 3,
-                "strides": ndarray.strides,  # type: ignore[union-attr, attr-defined]
-                "data": (ndarray.data.ptr, False),  # type: ignore[union-attr, attr-defined]
-            }
-
         return TensorBuffer(
             buffer=buffer,
             memory_address=memory_address,
diff --git a/src/gt4py/storage/cartesian/utils.py b/src/gt4py/storage/cartesian/utils.py
index b698fc534c..5a1e7c4706 100644
--- a/src/gt4py/storage/cartesian/utils.py
+++ b/src/gt4py/storage/cartesian/utils.py
@@ -33,16 +33,10 @@
     cp = None
 
 
-CUPY_DEVICE: Final[
-    Literal[None, core_defs.DeviceType.CUDA, core_defs.DeviceType.ROCM]
-] = (
+CUPY_DEVICE: Final[Literal[None, core_defs.DeviceType.CUDA, core_defs.DeviceType.ROCM]] = (
     None
     if not cp
-    else (
-        core_defs.DeviceType.ROCM
-        if cp.cuda.get_hipcc_path()
-        else core_defs.DeviceType.CUDA
-    )
+    else (core_defs.DeviceType.ROCM if cp.cuda.get_hipcc_path() else core_defs.DeviceType.CUDA)
 )
 
 
@@ -76,8 +70,7 @@ def _compute_padded_shape(shape, items_per_alignment, order_idx):
     padded_shape = list(shape)
     if len(order_idx) > 0:
         padded_shape[order_idx[-1]] = int(
-            math.ceil(padded_shape[order_idx[-1]] / items_per_alignment)
-            * items_per_alignment
+            math.ceil(padded_shape[order_idx[-1]] / items_per_alignment) * items_per_alignment
         )
     return padded_shape
 
@@ -165,9 +158,7 @@ def normalize_storage_spec(
         aligned_index = tuple(aligned_index)
 
         if any(i < 0 for i in aligned_index):
-            raise ValueError(
-                "aligned_index ({}) contains negative value.".format(aligned_index)
-            )
+            raise ValueError("aligned_index ({}) contains negative value.".format(aligned_index))
     else:
         raise TypeError("aligned_index must be an iterable of ints.")
 
@@ -224,24 +215,20 @@ def asarray(
         # CPU device should always be 0
         raise ValueError(f"Invalid device: {device!s}")
 
-    if xp:  
+    if xp:
         return xp.asarray(array)
 
     raise TypeError(f"Cannot convert {type(array)} to ndarray")
 
 
-def get_dims(
-    obj: Union[core_defs.GTDimsInterface, npt.NDArray]
-) -> Optional[Tuple[str, ...]]:
+def get_dims(obj: Union[core_defs.GTDimsInterface, npt.NDArray]) -> Optional[Tuple[str, ...]]:
     dims = getattr(obj, "__gt_dims__", None)
     if dims is None:
         return dims
     return tuple(str(d) for d in dims)
 
 
-def get_origin(
-    obj: Union[core_defs.GTDimsInterface, npt.NDArray]
-) -> Optional[Tuple[int, ...]]:
+def get_origin(obj: Union[core_defs.GTDimsInterface, npt.NDArray]) -> Optional[Tuple[int, ...]]:
     origin = getattr(obj, "__gt_origin__", None)
     if origin is None:
         return origin
@@ -276,11 +263,7 @@ def allocate_gpu(
 ) -> Tuple["cp.ndarray", "cp.ndarray"]:
     assert _GPUBufferAllocator is not None, "GPU allocation library or device not found"
     device = core_defs.Device(  # type: ignore[type-var]
-        (
-            core_defs.DeviceType.ROCM
-            if gt_config.GT4PY_USE_HIP
-            else core_defs.DeviceType.CUDA
-        ),
+        (core_defs.DeviceType.ROCM if gt_config.GT4PY_USE_HIP else core_defs.DeviceType.CUDA),
         0,
     )
     buffer = _GPUBufferAllocator.allocate(