-
Notifications
You must be signed in to change notification settings - Fork 920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migrate round to pylibcudf #15863
Migrate round to pylibcudf #15863
Changes from 4 commits
9411162
878f245
ff6453a
ddae621
4aedbc5
ac5caf8
6a67133
38c7d4b
fcb60e9
46aa296
26fc147
96312b8
4e530cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
===== | ||
round | ||
===== | ||
|
||
.. automodule:: cudf._lib.pylibcudf.round | ||
:members: |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
from libc.stdint cimport int32_t | ||
|
||
from cudf._lib.pylibcudf.libcudf.round cimport ( | ||
rounding_method as rounding_method, | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
from .column cimport Column | ||
|
||
|
||
cpdef Column round( | ||
Column source, | ||
int32_t decimal_places = *, | ||
rounding_method round_method = * | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
from libc.stdint cimport int32_t | ||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.pylibcudf.libcudf.round cimport ( | ||
round as cpp_round, | ||
rounding_method, | ||
) | ||
|
||
from cudf._lib.pylibcudf.libcudf.round import \ | ||
rounding_method as RoundingMethod # no-cython-lint | ||
|
||
from cudf._lib.pylibcudf.libcudf.column.column cimport column | ||
|
||
from .column cimport Column | ||
|
||
|
||
cpdef Column round( | ||
Column source, | ||
int32_t decimal_places = 0, | ||
rounding_method round_method = rounding_method.HALF_UP | ||
): | ||
"""Rounds all the values in a column to the specified number of decimal places. | ||
|
||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Parameters | ||
---------- | ||
source : Column | ||
The Column for which to round values. | ||
decimal_places: int32_t, optional | ||
The number of decimal places to round to (default 0) | ||
round_method: rounding_method, optional | ||
The method by which to round each value. | ||
Can be one of { RoundingMethod.HALF_UP, RoundingMethod.HALF_EVEN } | ||
(default rounding_method.HALF_UP) | ||
|
||
Returns | ||
------- | ||
pylibcudf.Column | ||
A Column with values rounded | ||
""" | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
c_result = move( | ||
cpp_round( | ||
source.view(), | ||
decimal_places, | ||
round_method | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,16 +2,10 @@ | |
|
||
from cudf.core.buffer import acquire_spill_lock | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.column cimport Column | ||
from cudf._lib.pylibcudf.libcudf.column.column cimport column | ||
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view | ||
from cudf._lib.pylibcudf.libcudf.round cimport ( | ||
round as cpp_round, | ||
rounding_method as cpp_rounding_method, | ||
) | ||
from cudf._lib.pylibcudf.round cimport rounding_method | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a general rule we want to avoid cimporting anything from pylibcudf into cudf._lib. The goal is for cudf to become pure Python, or as close to it as possible (we may still use Cython, but primarily for cudf-specific internal optimizations; calls to the libcudf APIs should be done by treating pylibcudf as a pure Python API). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. Followup: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. I still want the Cython API, but it's a lower priority. The Cython API will probably end up used in specific parts of cudf internals where we might actually see performance benefits, but in general I expect the bulk of usage to be of the Python API. The exceptions I foresee are cases like groupby or joins where generating many groupby/agg objects from cuDF can be accelerated using the pylibcudf Cython API. |
||
|
||
import cudf._lib.pylibcudf as plc | ||
|
||
|
||
@acquire_spill_lock() | ||
|
@@ -31,19 +25,15 @@ def round(Column input_col, int decimal_places=0, how="half_even"): | |
if how not in {"half_even", "half_up"}: | ||
raise ValueError("'how' must be either 'half_even' or 'half_up'") | ||
|
||
cdef column_view input_col_view = input_col.view() | ||
cdef unique_ptr[column] c_result | ||
cdef cpp_rounding_method c_how = ( | ||
cpp_rounding_method.HALF_EVEN if how == "half_even" | ||
else cpp_rounding_method.HALF_UP | ||
cdef rounding_method c_how = ( | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
rounding_method.HALF_EVEN if how == "half_even" | ||
else rounding_method.HALF_UP | ||
) | ||
with nogil: | ||
c_result = move( | ||
cpp_round( | ||
input_col_view, | ||
decimal_places, | ||
c_how | ||
) | ||
) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
return Column.from_pylibcudf( | ||
plc.round.round( | ||
input_col.to_pylibcudf(mode="read"), | ||
decimal_places, | ||
c_how | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pytest | ||
from utils import assert_column_eq | ||
|
||
import cudf._lib.pylibcudf as plc | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def pa_round_data(): | ||
pa_arr = pa.array([1.5, 2.5, 1.35, 1.45, 15, 25], type=pa.float64()) | ||
return pa_arr | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def plc_round_data(pa_round_data): | ||
return plc.interop.from_arrow(pa_round_data) | ||
|
||
|
||
@pytest.mark.parametrize("decimal_places", [0, 1, 10]) | ||
@pytest.mark.parametrize( | ||
"round_mode", | ||
[ | ||
("half_up", plc.round.RoundingMethod.HALF_UP), | ||
("half_to_even", plc.round.RoundingMethod.HALF_EVEN), | ||
], | ||
) | ||
def test_round(pa_round_data, plc_round_data, decimal_places, round_mode): | ||
pa_round_mode, plc_round_mode = round_mode | ||
res = plc.round.round( | ||
plc_round_data, | ||
decimal_places=decimal_places, | ||
round_method=plc_round_mode, | ||
) | ||
expected = pa.compute.round( | ||
pa_round_data, ndigits=decimal_places, round_mode=pa_round_mode | ||
) | ||
|
||
assert_column_eq(res, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do the strings after the enum values mean anything (e.g. "cudf::rounding_method::HALF_UP") or are they purely cosmetic?
(asking since I don't want to unintentionally change behavior by changing this here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The strings are meaningful, but removing them is the right answer. The strings are a way to alias the names of Cython objects so that they appear differently in generated C code. IOW when you write
cdef rounding_method x = HALF_UP
in Cython code, that will translate to C code that saysenum rounding_method X = cudf::rounding_method::HALF_UP
. The reason that we previously need this in enums in our Cython is because prior to version 3.0 Cython did not support C++ scoped enumerations (enum class
), but all of our C++ enums are defined that way, so the alias was the only way to trick Cython into generating the code that we needed (with raw C enums you don't need to docudf::rounding_method::HALF_UP
, you can just docudf::HALF_UP
because the enum doesn't create it's own namespace). By switching to using anenum class
here we remove the need for that.