Skip to content

Commit

Permalink
Remove cudf._lib.merge in favor of inlining pylibcudf (#17370)
Browse files Browse the repository at this point in the history
Contributes to #17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: #17370
  • Loading branch information
mroeschke authored Dec 5, 2024
1 parent fbc3256 commit 06e937b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 60 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ set(cython_sources
csv.pyx
groupby.pyx
interop.pyx
merge.pyx
orc.pyx
parquet.pyx
reduce.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
csv,
groupby,
interop,
merge,
nvtext,
orc,
parquet,
Expand Down
47 changes: 0 additions & 47 deletions python/cudf/cudf/_lib/merge.pyx

This file was deleted.

50 changes: 39 additions & 11 deletions python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
import numpy as np
import pandas as pd

import pylibcudf as plc

import cudf
from cudf._lib.column import Column
from cudf._lib.transform import one_hot_encode
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
Expand Down Expand Up @@ -941,21 +944,46 @@ def _merge_sorted(
idx + objs[0].index.nlevels for idx in key_columns_indices
]

columns = [
[
*(obj.index._columns if not ignore_index else ()),
*obj._columns,
]
columns = (
itertools.chain(obj.index._columns, obj._columns)
if not ignore_index
else obj._columns
for obj in objs
)

input_tables = [
plc.Table([col.to_pylibcudf(mode="read") for col in source_columns])
for source_columns in columns
]

num_keys = len(key_columns_indices)

column_order = (
plc.types.Order.ASCENDING if ascending else plc.types.Order.DESCENDING
)

if not ascending:
na_position = "last" if na_position == "first" else "first"

null_precedence = (
plc.types.NullOrder.BEFORE
if na_position == "first"
else plc.types.NullOrder.AFTER
)

plc_table = plc.merge.merge(
input_tables,
key_columns_indices,
[column_order] * num_keys,
[null_precedence] * num_keys,
)

result_columns = [
Column.from_pylibcudf(col) for col in plc_table.columns()
]

return objs[0]._from_columns_like_self(
cudf._lib.merge.merge_sorted(
input_columns=columns,
key_columns_indices=key_columns_indices,
ascending=ascending,
na_position=na_position,
),
result_columns,
column_names=objs[0]._column_names,
index_names=None if ignore_index else objs[0]._index_names,
)
Expand Down

0 comments on commit 06e937b

Please sign in to comment.