Skip to content

Commit

Permalink
Make ColumnBase.__cuda_array_interface__ opt out instead of opt in (#…
Browse files Browse the repository at this point in the history
…15622)

Column types that support CAI already have custom `NotImplementedError`s, and since the implementation is the same for datetime and numeric columns, moving their implementation to `ColumnBase`

Should help address timedelta support in #15615 cc @brandon-b-miller

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15622
  • Loading branch information
mroeschke authored May 1, 2024
1 parent 4da6fda commit 2eeacb9
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 75 deletions.
26 changes: 21 additions & 5 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,11 +1101,27 @@ def __arrow_array__(self, type=None):
)

@property
def __cuda_array_interface__(self):
raise NotImplementedError(
f"dtype {self.dtype} is not yet supported via "
"`__cuda_array_interface__`"
)
def __cuda_array_interface__(self) -> abc.Mapping[str, Any]:
output = {
"shape": (len(self),),
"strides": (self.dtype.itemsize,),
"typestr": self.dtype.str,
"data": (self.data_ptr, False),
"version": 1,
}

if self.nullable and self.has_nulls():
# Create a simple Python object that exposes the
# `__cuda_array_interface__` attribute here since we need to modify
# some of the attributes from the numba device array
output["mask"] = cuda_array_interface_wrapper(
ptr=self.mask_ptr,
size=len(self),
owner=self.mask,
readonly=True,
typestr="<t1",
)
return output

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return _array_ufunc(self, ufunc, method, inputs, kwargs)
Expand Down
27 changes: 2 additions & 25 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import locale
import re
from locale import nl_langinfo
from typing import Any, Mapping, Optional, Sequence, cast
from typing import Any, Optional, Sequence, cast

import numpy as np
import pandas as pd
Expand All @@ -25,7 +25,7 @@
)
from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
from cudf.core._compat import PANDAS_GE_220
from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.utils.dtypes import _get_base_dtype
Expand Down Expand Up @@ -399,29 +399,6 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:

return NotImplemented

@property
def __cuda_array_interface__(self) -> Mapping[str, Any]:
output = {
"shape": (len(self),),
"strides": (self.dtype.itemsize,),
"typestr": self.dtype.str,
"data": (self.data_ptr, False),
"version": 1,
}

if self.nullable and self.has_nulls():
# Create a simple Python object that exposes the
# `__cuda_array_interface__` attribute here since we need to modify
# some of the attributes from the numba device array
output["mask"] = cuda_array_interface_wrapper(
ptr=self.mask_ptr,
size=len(self),
owner=self.mask,
readonly=True,
typestr="<t1",
)
return output

def as_datetime_column(
self, dtype: Dtype, format: str | None = None
) -> DatetimeColumn:
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class DecimalBaseColumn(NumericalBaseColumn):
dtype: DecimalDtype
_VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS

@property
def __cuda_array_interface__(self):
raise NotImplementedError(
"Decimals are not yet supported via `__cuda_array_interface__`"
)

def as_decimal_column(
self,
dtype: Dtype,
Expand Down Expand Up @@ -342,12 +348,6 @@ def to_arrow(self):
buffers=[mask_buf, data_buf],
)

@property
def __cuda_array_interface__(self):
raise NotImplementedError(
"Decimals are not yet supported via `__cuda_array_interface__`"
)

def _with_type_metadata(
self: "cudf.core.column.Decimal64Column", dtype: Dtype
) -> "cudf.core.column.Decimal64Column":
Expand Down
37 changes: 2 additions & 35 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,7 @@
from __future__ import annotations

import functools
from typing import (
Any,
Callable,
Mapping,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast

import cupy as cp
import numpy as np
Expand All @@ -37,7 +28,7 @@
is_integer_dtype,
is_scalar,
)
from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
from cudf.core.buffer import Buffer
from cudf.core.column import (
ColumnBase,
as_column,
Expand Down Expand Up @@ -194,30 +185,6 @@ def __setitem__(self, key: Any, value: Any):
if out:
self._mimic_inplace(out, inplace=True)

@property
def __cuda_array_interface__(self) -> Mapping[str, Any]:
output = {
"shape": (len(self),),
"strides": (self.dtype.itemsize,),
"typestr": self.dtype.str,
"data": (self.data_ptr, False),
"version": 1,
}

if self.nullable and self.has_nulls():
# Create a simple Python object that exposes the
# `__cuda_array_interface__` attribute here since we need to modify
# some of the attributes from the numba device array
output["mask"] = cuda_array_interface_wrapper(
ptr=self.mask_ptr,
size=len(self),
owner=self.mask,
readonly=True,
typestr="<t1",
)

return output

def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase:
if callable(unaryop):
return libcudf.transform.transform(self, unaryop)
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5600,6 +5600,13 @@ def data_array_view(
) -> cuda.devicearray.DeviceNDArray:
raise ValueError("Cannot get an array view of a StringColumn")

@property
def __cuda_array_interface__(self):
raise NotImplementedError(
f"dtype {self.dtype} is not yet supported via "
"`__cuda_array_interface__`"
)

def to_arrow(self) -> pa.Array:
"""Convert to PyArrow Array
Expand Down
19 changes: 15 additions & 4 deletions python/cudf/cudf/tests/test_cuda_array_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@

import cudf
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
from cudf.testing._utils import (
DATETIME_TYPES,
NUMERIC_TYPES,
TIMEDELTA_TYPES,
assert_eq,
)


@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
Expand Down Expand Up @@ -42,7 +47,9 @@ def test_cuda_array_interface_interop_in(dtype, module):
assert_eq(pd_data, gdf["test"])


@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES + ["str"])
@pytest.mark.parametrize(
"dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["str"]
)
@pytest.mark.parametrize("module", ["cupy", "numba"])
def test_cuda_array_interface_interop_out(dtype, module):
expectation = does_not_raise()
Expand Down Expand Up @@ -73,7 +80,9 @@ def to_host_function(x):
assert_eq(expect, got)


@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
@pytest.mark.parametrize(
"dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES
)
@pytest.mark.parametrize("module", ["cupy", "numba"])
def test_cuda_array_interface_interop_out_masked(dtype, module):
expectation = does_not_raise()
Expand Down Expand Up @@ -104,7 +113,9 @@ def to_host_function(x):
module_data = module_constructor(cudf_data) # noqa: F841


@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
@pytest.mark.parametrize(
"dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES
)
@pytest.mark.parametrize("nulls", ["all", "some", "bools", "none"])
@pytest.mark.parametrize("mask_type", ["bits", "bools"])
def test_cuda_array_interface_as_column(dtype, nulls, mask_type):
Expand Down

0 comments on commit 2eeacb9

Please sign in to comment.