From 2eeacb9f5f22a56458b644a93b8cbeacd4844472 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:55:40 -1000 Subject: [PATCH] Make ColumnBase.__cuda_array_interface__ opt out instead of opt in (#15622) Column types that support CAI already have custom `NotImplementedError`s, and since the implementation is the same for datetime and numeric columns, moving their implementation to `ColumnBase` Should help address timedelta support in https://github.com/rapidsai/cudf/pull/15615 cc @brandon-b-miller Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15622 --- python/cudf/cudf/core/column/column.py | 26 ++++++++++--- python/cudf/cudf/core/column/datetime.py | 27 +------------- python/cudf/cudf/core/column/decimal.py | 12 +++--- python/cudf/cudf/core/column/numerical.py | 37 +------------------ python/cudf/cudf/core/column/string.py | 7 ++++ .../cudf/tests/test_cuda_array_interface.py | 19 ++++++++-- 6 files changed, 53 insertions(+), 75 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7e48552742c..ba2dab2c2e1 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1101,11 +1101,27 @@ def __arrow_array__(self, type=None): ) @property - def __cuda_array_interface__(self): - raise NotImplementedError( - f"dtype {self.dtype} is not yet supported via " - "`__cuda_array_interface__`" - ) + def __cuda_array_interface__(self) -> abc.Mapping[str, Any]: + output = { + "shape": (len(self),), + "strides": (self.dtype.itemsize,), + "typestr": self.dtype.str, + "data": (self.data_ptr, False), + "version": 1, + } + + if self.nullable and self.has_nulls(): + # Create a simple Python object that exposes the + # `__cuda_array_interface__` attribute here since we need to modify + # some of the attributes from the numba device array + output["mask"] = cuda_array_interface_wrapper( + ptr=self.mask_ptr, + size=len(self), + owner=self.mask, + readonly=True, + typestr=" ScalarLike: return NotImplemented - @property - def __cuda_array_interface__(self) -> Mapping[str, Any]: - output = { - "shape": (len(self),), - "strides": (self.dtype.itemsize,), - "typestr": self.dtype.str, - "data": (self.data_ptr, False), - "version": 1, - } - - if self.nullable and self.has_nulls(): - # Create a simple Python object that exposes the - # `__cuda_array_interface__` attribute here since we need to modify - # some of the attributes from the numba device array - output["mask"] = cuda_array_interface_wrapper( - ptr=self.mask_ptr, - size=len(self), - owner=self.mask, - readonly=True, - typestr=" DatetimeColumn: diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index b83a6ded416..3a0f6649e21 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -38,6 +38,12 @@ class DecimalBaseColumn(NumericalBaseColumn): dtype: DecimalDtype _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS + @property + def __cuda_array_interface__(self): + raise NotImplementedError( + "Decimals are not yet supported via `__cuda_array_interface__`" + ) + def as_decimal_column( self, dtype: Dtype, @@ -342,12 +348,6 @@ def to_arrow(self): buffers=[mask_buf, data_buf], ) - @property - def __cuda_array_interface__(self): - raise NotImplementedError( - "Decimals are not yet supported via `__cuda_array_interface__`" - ) - def _with_type_metadata( self: "cudf.core.column.Decimal64Column", dtype: Dtype ) -> "cudf.core.column.Decimal64Column": diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index f42c87de3fd..4c211a173b1 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -3,16 +3,7 @@ from __future__ import annotations import functools -from typing import ( - Any, - Callable, - Mapping, - Optional, - Sequence, - Tuple, - Union, - cast, -) +from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast import cupy as cp import numpy as np @@ -37,7 +28,7 @@ is_integer_dtype, is_scalar, ) -from cudf.core.buffer import Buffer, cuda_array_interface_wrapper +from cudf.core.buffer import Buffer from cudf.core.column import ( ColumnBase, as_column, @@ -194,30 +185,6 @@ def __setitem__(self, key: Any, value: Any): if out: self._mimic_inplace(out, inplace=True) - @property - def __cuda_array_interface__(self) -> Mapping[str, Any]: - output = { - "shape": (len(self),), - "strides": (self.dtype.itemsize,), - "typestr": self.dtype.str, - "data": (self.data_ptr, False), - "version": 1, - } - - if self.nullable and self.has_nulls(): - # Create a simple Python object that exposes the - # `__cuda_array_interface__` attribute here since we need to modify - # some of the attributes from the numba device array - output["mask"] = cuda_array_interface_wrapper( - ptr=self.mask_ptr, - size=len(self), - owner=self.mask, - readonly=True, - typestr=" ColumnBase: if callable(unaryop): return libcudf.transform.transform(self, unaryop) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 8143e7919a7..3e941d60079 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5600,6 +5600,13 @@ def data_array_view( ) -> cuda.devicearray.DeviceNDArray: raise ValueError("Cannot get an array view of a StringColumn") + @property + def __cuda_array_interface__(self): + raise NotImplementedError( + f"dtype {self.dtype} is not yet supported via " + "`__cuda_array_interface__`" + ) + def to_arrow(self) -> pa.Array: """Convert to PyArrow Array diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index 213c6c2c1f9..f98c3ad0475 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -11,7 +11,12 @@ import cudf from cudf.core.buffer.spill_manager import get_global_manager -from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq +from cudf.testing._utils import ( + DATETIME_TYPES, + NUMERIC_TYPES, + TIMEDELTA_TYPES, + assert_eq, +) @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) @@ -42,7 +47,9 @@ def test_cuda_array_interface_interop_in(dtype, module): assert_eq(pd_data, gdf["test"]) -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES + ["str"]) +@pytest.mark.parametrize( + "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["str"] +) @pytest.mark.parametrize("module", ["cupy", "numba"]) def test_cuda_array_interface_interop_out(dtype, module): expectation = does_not_raise() @@ -73,7 +80,9 @@ def to_host_function(x): assert_eq(expect, got) -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) +@pytest.mark.parametrize( + "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES +) @pytest.mark.parametrize("module", ["cupy", "numba"]) def test_cuda_array_interface_interop_out_masked(dtype, module): expectation = does_not_raise() @@ -104,7 +113,9 @@ def to_host_function(x): module_data = module_constructor(cudf_data) # noqa: F841 -@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) +@pytest.mark.parametrize( + "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES +) @pytest.mark.parametrize("nulls", ["all", "some", "bools", "none"]) @pytest.mark.parametrize("mask_type", ["bits", "bools"]) def test_cuda_array_interface_as_column(dtype, nulls, mask_type):