Skip to content

Commit 88b1e00

Browse files
authored
Merge pull request rapidsai#15800 from rapidsai/branch-24.06
Forward-merge branch-24.06 into branch-24.08
2 parents d67e073 + 60d5717 commit 88b1e00

File tree

5 files changed

+53
-40
lines changed

5 files changed

+53
-40
lines changed

python/cudf/cudf/core/_base_index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def name(self):
145145
raise NotImplementedError
146146

147147
@property # type: ignore
148-
def ndim(self): # noqa: D401
148+
def ndim(self) -> int: # noqa: D401
149149
"""Number of dimensions of the underlying data, by definition 1."""
150150
return 1
151151

python/cudf/cudf/core/dataframe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1234,7 +1234,7 @@ def dtypes(self):
12341234
return pd.Series(self._dtypes, dtype="object")
12351235

12361236
@property
1237-
def ndim(self):
1237+
def ndim(self) -> int:
12381238
"""Dimension of the data. DataFrame ndim is always 2."""
12391239
return 2
12401240

python/cudf/cudf/core/frame.py

+49-36
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import itertools
77
import operator
88
import pickle
9+
import types
910
import warnings
1011
from collections import abc
1112
from typing import (
@@ -91,6 +92,10 @@ def _dtypes(self):
9192
zip(self._data.names, (col.dtype for col in self._data.columns))
9293
)
9394

95+
@property
96+
def ndim(self) -> int:
97+
raise NotImplementedError()
98+
9499
@_cudf_nvtx_annotate
95100
def serialize(self):
96101
# TODO: See if self._data can be serialized outright
@@ -417,51 +422,60 @@ def __arrow_array__(self, type=None):
417422
@_cudf_nvtx_annotate
418423
def _to_array(
419424
self,
420-
get_column_values: Callable,
421-
make_empty_matrix: Callable,
425+
get_array: Callable,
426+
module: types.ModuleType,
427+
copy: bool,
422428
dtype: Union[Dtype, None] = None,
423429
na_value=None,
424-
) -> Union[cupy.ndarray, np.ndarray]:
430+
) -> Union[cupy.ndarray, numpy.ndarray]:
425431
# Internal function to implement to_cupy and to_numpy, which are nearly
426432
# identical except for the attribute they access to generate values.
427433

428-
def get_column_values_na(col):
434+
def to_array(
435+
col: ColumnBase, dtype: np.dtype
436+
) -> Union[cupy.ndarray, numpy.ndarray]:
429437
if na_value is not None:
430438
col = col.fillna(na_value)
431-
return get_column_values(col)
439+
array = get_array(col)
440+
casted_array = module.asarray(array, dtype=dtype)
441+
if copy and casted_array is array:
442+
# Don't double copy after asarray
443+
casted_array = casted_array.copy()
444+
return casted_array
432445

433-
# Early exit for an empty Frame.
434446
ncol = self._num_columns
435447
if ncol == 0:
436-
return make_empty_matrix(
437-
shape=(len(self), ncol), dtype=np.dtype("float64"), order="F"
448+
return module.empty(
449+
shape=(len(self), ncol),
450+
dtype=numpy.dtype("float64"),
451+
order="F",
438452
)
439453

440454
if dtype is None:
441-
dtypes = [col.dtype for col in self._data.values()]
442-
for dtype in dtypes:
443-
if isinstance(
444-
dtype,
445-
(
446-
cudf.ListDtype,
447-
cudf.core.dtypes.DecimalDtype,
448-
cudf.StructDtype,
449-
),
450-
):
451-
raise NotImplementedError(
452-
f"{dtype} cannot be exposed as a cupy array"
453-
)
454-
dtype = find_common_type(dtypes)
455+
if ncol == 1:
456+
dtype = next(iter(self._data.values())).dtype
457+
else:
458+
dtype = find_common_type(
459+
[col.dtype for col in self._data.values()]
460+
)
455461

456-
matrix = make_empty_matrix(
457-
shape=(len(self), ncol), dtype=dtype, order="F"
458-
)
459-
for i, col in enumerate(self._data.values()):
460-
# TODO: col.values may fail if there is nullable data or an
461-
# unsupported dtype. We may want to catch and provide a more
462-
# suitable error.
463-
matrix[:, i] = get_column_values_na(col)
464-
return matrix
462+
if not isinstance(dtype, numpy.dtype):
463+
raise NotImplementedError(
464+
f"{dtype} cannot be exposed as an array"
465+
)
466+
467+
if self.ndim == 1:
468+
return to_array(self._data.columns[0], dtype)
469+
else:
470+
matrix = module.empty(
471+
shape=(len(self), ncol), dtype=dtype, order="F"
472+
)
473+
for i, col in enumerate(self._data.values()):
474+
# TODO: col.values may fail if there is nullable data or an
475+
# unsupported dtype. We may want to catch and provide a more
476+
# suitable error.
477+
matrix[:, i] = to_array(col, dtype)
478+
return matrix
465479

466480
# TODO: As of now, calling cupy.asarray is _much_ faster than calling
467481
# to_cupy. We should investigate the reasons why and whether we can provide
@@ -496,10 +510,9 @@ def to_cupy(
496510
cupy.ndarray
497511
"""
498512
return self._to_array(
499-
(lambda col: col.values.copy())
500-
if copy
501-
else (lambda col: col.values),
502-
cupy.empty,
513+
lambda col: col.values,
514+
cupy,
515+
copy,
503516
dtype,
504517
na_value,
505518
)
@@ -536,7 +549,7 @@ def to_numpy(
536549
)
537550

538551
return self._to_array(
539-
(lambda col: col.values_host), np.empty, dtype, na_value
552+
lambda col: col.values_host, numpy, copy, dtype, na_value
540553
)
541554

542555
@_cudf_nvtx_annotate

python/cudf/cudf/core/multiindex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ def levels(self):
563563

564564
@property # type: ignore
565565
@_cudf_nvtx_annotate
566-
def ndim(self):
566+
def ndim(self) -> int:
567567
"""Dimension of the data. For MultiIndex ndim is always 2."""
568568
return 2
569569

python/cudf/cudf/core/single_column_frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def name(self, value):
7777

7878
@property # type: ignore
7979
@_cudf_nvtx_annotate
80-
def ndim(self): # noqa: D401
80+
def ndim(self) -> int: # noqa: D401
8181
"""Number of dimensions of the underlying data, by definition 1."""
8282
return 1
8383

0 commit comments

Comments
 (0)