Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate reshape.pxd to pylibcudf #15827

Merged
merged 8 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This page provides API documentation for pylibcudf.
lists
merge
reduce
reshape
rolling
scalar
search
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=======
reshape
=======

.. automodule:: cudf._lib.pylibcudf.reshape
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
merge.pyx
reduce.pyx
replace.pyx
reshape.pyx
rolling.pyx
scalar.pyx
search.pyx
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
merge,
reduce,
replace,
reshape,
rolling,
search,
sorting,
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
merge,
reduce,
replace,
reshape,
rolling,
search,
sorting,
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table


cpdef Column interleave_columns(Table source_table)
cpdef Table tile(Table source_table, size_type count)
65 changes: 65 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/reshape.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .table cimport Table


cpdef Column interleave_columns(Table source_table):
"""Interleave columns of a table into a single column.

Converts the column major table `input` into a row major column.

Example:
in = [[A1, A2, A3], [B1, B2, B3]]
return = [A1, B1, A2, B2, A3, B3]

Parameters
----------
source_table: Table
The input table to interleave

Returns
-------
Column
A new column which is the result of interleaving the input columns
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_interleave_columns(source_table.view()))

return Column.from_libcudf(move(c_result))


cpdef Table tile(Table source_table, size_type count):
"""Repeats the rows from input table count times to form a new table.

Parameters
----------
source_table: Table
The input table containing rows to be repeated
count: size_type
The number of times to tile "rows". Must be non-negative

Returns
-------
Table
The table containing the tiled "rows"
"""
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_tile(source_table.view(), count))

return Table.from_libcudf(move(c_result))
42 changes: 18 additions & 24 deletions python/cudf/cudf/_lib/reshape.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,33 @@

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.utils cimport columns_from_pylibcudf_table

import cudf._lib.pylibcudf as plc


@acquire_spill_lock()
def interleave_columns(list source_columns):
cdef table_view c_view = table_view_from_columns(source_columns)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_interleave_columns(c_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.reshape.interleave_columns(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
])
)
)


@acquire_spill_lock()
def tile(list source_columns, size_type count):
cdef size_type c_count = count
cdef table_view c_view = table_view_from_columns(source_columns)
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_tile(c_view, c_count))

return columns_from_unique_ptr(move(c_result))
return columns_from_pylibcudf_table(
plc.reshape.tile(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
]),
c_count
)
)
43 changes: 43 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_reshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq, assert_table_eq

from cudf._lib import pylibcudf as plc


@pytest.fixture(scope="module")
def reshape_data():
data = [[1, 2, 3], [4, 5, 6]]
return data


@pytest.fixture(scope="module")
def reshape_plc_tbl(reshape_data):
arrow_tbl = pa.Table.from_arrays(reshape_data, names=["a", "b"])
plc_tbl = plc.interop.from_arrow(arrow_tbl)
Comment on lines +18 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well pull this out into a fixture too so that it's only done once (especially since you parametrize the tile test below so it'll do this four times).

return plc_tbl


def test_interleave_columns(reshape_data, reshape_plc_tbl):
res = plc.reshape.interleave_columns(reshape_plc_tbl)

interleaved_data = [pa.array(pair) for pair in zip(*reshape_data)]

expect = pa.concat_arrays(interleaved_data)

assert_column_eq(res, expect)


@pytest.mark.parametrize("cnt", [0, 1, 3])
def test_tile(reshape_data, reshape_plc_tbl, cnt):
res = plc.reshape.tile(reshape_plc_tbl, cnt)

tiled_data = [pa.array(col * cnt) for col in reshape_data]

expect = pa.Table.from_arrays(
tiled_data, schema=plc.interop.to_arrow(reshape_plc_tbl).schema
)

assert_table_eq(res, expect)
Loading