-
Notifications
You must be signed in to change notification settings - Fork 917
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add labeling APIs to pylibcudf (#16761)
Contributes to #15162 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: #16761
- Loading branch information
Showing
11 changed files
with
134 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ set(cython_sources | |
groupby.pyx | ||
interop.pyx | ||
join.pyx | ||
labeling.pyx | ||
lists.pyx | ||
merge.pyx | ||
null_mask.pyx | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ from . cimport ( | |
filling, | ||
groupby, | ||
join, | ||
labeling, | ||
lists, | ||
merge, | ||
null_mask, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
from libcpp cimport bool | ||
from pylibcudf.libcudf.labeling cimport inclusive | ||
|
||
from .column cimport Column | ||
|
||
|
||
cpdef Column label_bins( | ||
Column input, | ||
Column left_edges, | ||
bool left_inclusive, | ||
Column right_edges, | ||
bool right_inclusive | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.libcudf cimport labeling as cpp_labeling | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.labeling cimport inclusive | ||
|
||
from pylibcudf.libcudf.labeling import inclusive as Inclusive # no-cython-lint | ||
|
||
from .column cimport Column | ||
|
||
|
||
cpdef Column label_bins( | ||
Column input, | ||
Column left_edges, | ||
bool left_inclusive, | ||
Column right_edges, | ||
bool right_inclusive | ||
): | ||
"""Labels elements based on membership in the specified bins. | ||
Parameters | ||
---------- | ||
input : Column | ||
Column of input elements to label according to the specified bins. | ||
left_edges : Column | ||
Column of the left edge of each bin. | ||
left_inclusive : bool | ||
Whether or not the left edge is inclusive. | ||
right_edges : Column | ||
Column of the right edge of each bin. | ||
right_inclusive : bool | ||
Whether or not the right edge is inclusive. | ||
Returns | ||
------- | ||
Column | ||
Column of integer labels of the elements in `input` | ||
according to the specified bins. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
cdef inclusive c_left_inclusive = ( | ||
inclusive.YES | ||
if left_inclusive | ||
else inclusive.NO | ||
) | ||
cdef inclusive c_right_inclusive = ( | ||
inclusive.YES | ||
if right_inclusive | ||
else inclusive.NO | ||
) | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_labeling.label_bins( | ||
input.view(), | ||
left_edges.view(), | ||
c_left_inclusive, | ||
right_edges.view(), | ||
c_right_inclusive, | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pylibcudf as plc | ||
import pytest | ||
|
||
|
||
@pytest.mark.parametrize("left_inclusive", [True, False]) | ||
@pytest.mark.parametrize("right_inclusive", [True, False]) | ||
def test_label_bins(left_inclusive, right_inclusive): | ||
in_col = plc.interop.from_arrow(pa.array([1, 2, 3])) | ||
left_edges = plc.interop.from_arrow(pa.array([0, 5])) | ||
right_edges = plc.interop.from_arrow(pa.array([4, 6])) | ||
result = plc.interop.to_arrow( | ||
plc.labeling.label_bins( | ||
in_col, left_edges, left_inclusive, right_edges, right_inclusive | ||
) | ||
) | ||
expected = pa.chunked_array([[0, 0, 0]], type=pa.int32()) | ||
assert result.equals(expected) | ||
|
||
|
||
def test_inclusive_enum(): | ||
assert plc.labeling.Inclusive.YES == 0 | ||
assert plc.labeling.Inclusive.NO == 1 |