Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.quantiles in favor of inlining pylibcudf #17347

Merged
merged 1 commit into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ set(cython_sources
orc.pyx
parquet.pyx
partitioning.pyx
quantiles.pyx
reduce.pyx
replace.pyx
reshape.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
orc,
parquet,
partitioning,
quantiles,
reduce,
replace,
reshape,
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

from __future__ import annotations

from typing import Literal

from typing_extensions import Self

import pylibcudf as plc

from cudf._typing import Dtype, DtypeObj, ScalarLike
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase
Expand Down Expand Up @@ -71,3 +75,8 @@ class Column:
# TODO: The val parameter should be Scalar, not ScalarLike
@staticmethod
def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ...
@staticmethod
def from_pylibcudf(
col: plc.Column, data_ptr_exposed: bool = False
) -> ColumnBase: ...
def to_pylibcudf(self, mode: Literal["read", "write"]) -> plc.Column: ...
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ cdef table_view table_view_from_columns(columns) except *
cdef table_view table_view_from_table(tbl, ignore_index=*) except*
cdef columns_from_unique_ptr(unique_ptr[table] c_tbl)
cdef columns_from_table_view(table_view tv, object owners)
cdef columns_from_pylibcudf_table(tbl)
cpdef columns_from_pylibcudf_table(tbl)
cdef _data_from_columns(columns, column_names, index_names=*)
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ cdef columns_from_unique_ptr(
return columns


cdef columns_from_pylibcudf_table(tbl):
cpdef columns_from_pylibcudf_table(tbl):
"""Convert a pylibcudf table into list of columns.

Parameters
Expand Down
14 changes: 0 additions & 14 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import annotations

import warnings
from collections.abc import Sequence
from decimal import Decimal
from typing import TYPE_CHECKING, cast

Expand Down Expand Up @@ -216,19 +215,6 @@ def normalize_binop_value(self, other):
)
return NotImplemented

def _decimal_quantile(
self, q: float | Sequence[float], interpolation: str, exact: bool
) -> ColumnBase:
quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
# get sorted indices and exclude nulls
indices = libcudf.sort.order_by(
[self], [True], "first", stable=True
).slice(self.null_count, len(self))
result = libcudf.quantiles.quantile(
self, quant, interpolation, indices, exact
)
return result._with_type_metadata(self.dtype)

def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
Expand Down
16 changes: 12 additions & 4 deletions python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

import numpy as np

import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf.core.buffer import Buffer
from cudf.core.buffer import Buffer, acquire_spill_lock
from cudf.core.column import ColumnBase
from cudf.core.missing import NA
from cudf.core.mixins import Scannable
Expand Down Expand Up @@ -145,9 +147,15 @@ def quantile(
indices = libcudf.sort.order_by(
[self], [True], "first", stable=True
).slice(self.null_count, len(self))
result = libcudf.quantiles.quantile(
self, q, interpolation, indices, exact
)
with acquire_spill_lock():
plc_column = plc.quantiles.quantile(
self.to_pylibcudf(mode="read"),
q,
plc.types.Interpolation[interpolation.upper()],
indices.to_pylibcudf(mode="read"),
exact,
)
result = type(self).from_pylibcudf(plc_column) # type: ignore[assignment]
if return_scalar:
scalar_result = result.element_indexing(0)
if interpolation in {"lower", "higher", "nearest"}:
Expand Down
13 changes: 9 additions & 4 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,15 +799,20 @@ def _quantile_table(

null_precedence = [plc.types.NullOrder[key] for key in null_precedence]

return self._from_columns_like_self(
libcudf.quantiles.quantile_table(
[*self._columns],
with acquire_spill_lock():
plc_table = plc.quantiles.quantiles(
plc.Table(
[c.to_pylibcudf(mode="read") for c in self._columns]
),
q,
interpolation,
is_sorted,
column_order,
null_precedence,
),
)
columns = libcudf.utils.columns_from_pylibcudf_table(plc_table)
return self._from_columns_like_self(
columns,
column_names=self._column_names,
)

Expand Down
Loading