-
Notifications
You must be signed in to change notification settings - Fork 918
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add string.find_multiple APIs to pylibcudf
- Loading branch information
Showing
10 changed files
with
103 additions
and
23 deletions.
There are no files selected for viewing
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find_multiple.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
============= | ||
find_multiple | ||
============= | ||
|
||
.. automodule:: pylibcudf.strings.find_multiple | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ strings | |
contains | ||
extract | ||
find | ||
find_multiple | ||
findall | ||
regex_flags | ||
regex_program | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,20 @@ | ||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf.core.buffer import acquire_spill_lock | ||
|
||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.column.column_view cimport column_view | ||
from pylibcudf.libcudf.strings.find_multiple cimport ( | ||
find_multiple as cpp_find_multiple, | ||
) | ||
|
||
from cudf._lib.column cimport Column | ||
|
||
import pylibcudf as plc | ||
|
||
|
||
@acquire_spill_lock() | ||
def find_multiple(Column source_strings, Column target_strings): | ||
""" | ||
Returns a column with character position values where each | ||
of the `target_strings` are found in each string of `source_strings`. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
cdef column_view source_view = source_strings.view() | ||
cdef column_view target_view = target_strings.view() | ||
|
||
with nogil: | ||
c_result = move(cpp_find_multiple( | ||
source_view, | ||
target_view | ||
)) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
plc_result = plc.strings.find_multiple.find_multiple( | ||
source_strings.to_pylibcudf(mode="read"), | ||
target_strings.to_pylibcudf(mode="read") | ||
) | ||
return Column.from_pylibcudf(plc_result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ from . cimport ( | |
convert, | ||
extract, | ||
find, | ||
find_multiple, | ||
findall, | ||
regex_flags, | ||
regex_program, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
convert, | ||
extract, | ||
find, | ||
find_multiple, | ||
findall, | ||
regex_flags, | ||
regex_program, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
|
||
|
||
cpdef Column find_multiple(Column input, Column targets) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.strings cimport find_multiple as cpp_find_multiple | ||
|
||
|
||
cpdef Column find_multiple(Column input, Column targets): | ||
""" | ||
Returns a lists column with character position values where each | ||
of the target strings are found in each string. | ||
For details, see :cpp:func:`cudf::strings::find_multiple`. | ||
Parameters | ||
---------- | ||
input : Column | ||
Strings instance for this operation | ||
targets : Column | ||
Strings to search for in each string | ||
Returns | ||
------- | ||
Column | ||
Lists column with character position values | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_find_multiple.find_multiple( | ||
input.view(), | ||
targets.view() | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
26 changes: 26 additions & 0 deletions
26
python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pylibcudf as plc | ||
|
||
|
||
def test_find_multiple(): | ||
arr = pa.array(["abc", "def"]) | ||
targets = pa.array(["a", "c", "e"]) | ||
plc_result = plc.strings.find_multiple.find_multiple( | ||
plc.interop.from_arrow(arr), | ||
plc.interop.from_arrow(targets), | ||
) | ||
result = plc.interop.to_arrow(plc_result) | ||
expected = pa.chunked_array( | ||
[ | ||
pa.array( | ||
[ | ||
[elem.find(target) for target in targets.to_pylist()] | ||
for elem in arr.to_pylist() | ||
], | ||
type=result.type, | ||
) | ||
] | ||
) | ||
assert result.equals(expected) |