-
Notifications
You must be signed in to change notification settings - Fork 917
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This PR creates `pylibcudf` `case` APIs and migrates the cuDF cython to leverage them. Part of #15162. Authors: - https://github.com/brandon-b-miller - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #15489
- Loading branch information
1 parent
af33b0a
commit 8506ea6
Showing
10 changed files
with
124 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# ============================================================================= | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
# in compliance with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software distributed under the License | ||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
# or implied. See the License for the specific language governing permissions and limitations under | ||
# the License. | ||
# ============================================================================= | ||
|
||
set(cython_sources case.pyx) | ||
set(linked_libraries cudf::cudf) | ||
rapids_cython_create_modules( | ||
CXX | ||
SOURCE_FILES "${cython_sources}" | ||
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . import case |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . import case |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
|
||
|
||
cpdef Column to_lower(Column input) | ||
cpdef Column to_upper(Column input) | ||
cpdef Column swapcase(Column input) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.cpp.column.column cimport column | ||
from cudf._lib.cpp.strings cimport case as cpp_case | ||
from cudf._lib.pylibcudf.column cimport Column | ||
|
||
|
||
cpdef Column to_lower(Column input): | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
c_result = cpp_case.to_lower(input.view()) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
cpdef Column to_upper(Column input): | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
c_result = cpp_case.to_upper(input.view()) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
cpdef Column swapcase(Column input): | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
c_result = cpp_case.swapcase(input.view()) | ||
|
||
return Column.from_libcudf(move(c_result)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,34 @@ | ||
# Copyright (c) 2018-2022, NVIDIA CORPORATION. | ||
# Copyright (c) 2018-2024, NVIDIA CORPORATION. | ||
|
||
from cudf.core.buffer import acquire_spill_lock | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.column cimport Column | ||
from cudf._lib.cpp.column.column cimport column | ||
from cudf._lib.cpp.column.column_view cimport column_view | ||
from cudf._lib.cpp.strings.case cimport ( | ||
swapcase as cpp_swapcase, | ||
to_lower as cpp_to_lower, | ||
to_upper as cpp_to_upper, | ||
) | ||
|
||
from cudf._lib.pylibcudf.strings import case | ||
|
||
|
||
@acquire_spill_lock() | ||
def to_upper(Column source_strings): | ||
cdef unique_ptr[column] c_result | ||
cdef column_view source_view = source_strings.view() | ||
|
||
with nogil: | ||
c_result = move(cpp_to_upper(source_view)) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
return Column.from_pylibcudf( | ||
case.to_upper( | ||
source_strings.to_pylibcudf(mode='read') | ||
) | ||
) | ||
|
||
|
||
@acquire_spill_lock() | ||
def to_lower(Column source_strings): | ||
cdef unique_ptr[column] c_result | ||
cdef column_view source_view = source_strings.view() | ||
|
||
with nogil: | ||
c_result = move(cpp_to_lower(source_view)) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
return Column.from_pylibcudf( | ||
case.to_lower( | ||
source_strings.to_pylibcudf(mode='read') | ||
) | ||
) | ||
|
||
|
||
@acquire_spill_lock() | ||
def swapcase(Column source_strings): | ||
cdef unique_ptr[column] c_result | ||
cdef column_view source_view = source_strings.view() | ||
|
||
with nogil: | ||
c_result = move(cpp_swapcase(source_view)) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
return Column.from_pylibcudf( | ||
case.swapcase( | ||
source_strings.to_pylibcudf(mode='read') | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pytest | ||
from utils import assert_column_eq | ||
|
||
import cudf._lib.pylibcudf as plc | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def string_col(): | ||
return pa.array( | ||
["AbC", "de", "FGHI", "j", "kLm", "nOPq", None, "RsT", None, "uVw"] | ||
) | ||
|
||
|
||
def test_to_upper(string_col): | ||
plc_col = plc.interop.from_arrow(string_col) | ||
got = plc.strings.case.to_upper(plc_col) | ||
expected = pa.compute.utf8_upper(string_col) | ||
assert_column_eq(got, expected) | ||
|
||
|
||
def test_to_lower(string_col): | ||
plc_col = plc.interop.from_arrow(string_col) | ||
got = plc.strings.case.to_lower(plc_col) | ||
expected = pa.compute.utf8_lower(string_col) | ||
assert_column_eq(got, expected) | ||
|
||
|
||
def test_swapcase(string_col): | ||
plc_col = plc.interop.from_arrow(string_col) | ||
got = plc.strings.case.swapcase(plc_col) | ||
expected = pa.compute.utf8_swapcase(string_col) | ||
assert_column_eq(got, expected) |