Skip to content

Commit

Permalink
Fix groupby.len with null values in cudf.polars (#17671)
Browse files Browse the repository at this point in the history
closes #17667

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #17671
  • Loading branch information
mroeschke authored Jan 3, 2025
1 parent 1dece5e commit 07406b3
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
8 changes: 6 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
# TODO: remove need for this
# ruff: noqa: D101
Expand Down Expand Up @@ -69,7 +69,11 @@ def __init__(
# TODO: handle nans
req = plc.aggregation.variance(ddof=options)
elif name == "count":
req = plc.aggregation.count(null_handling=plc.types.NullPolicy.EXCLUDE)
req = plc.aggregation.count(
null_handling=plc.types.NullPolicy.EXCLUDE
if not options
else plc.types.NullPolicy.INCLUDE
)
elif name == "quantile":
_, quantile = self.children
if not isinstance(quantile, Literal):
Expand Down
8 changes: 7 additions & 1 deletion python/cudf_polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand Down Expand Up @@ -213,3 +213,9 @@ def test_groupby_maintain_order_random(nrows, nkeys, with_nulls):
)
q = df.lazy().group_by(key_names, maintain_order=True).agg(pl.col("value").sum())
assert_gpu_result_equal(q)


def test_groupby_len_with_nulls():
df = pl.DataFrame({"a": [1, 1, 1, 2], "b": [1, None, 2, 3]})
q = df.lazy().group_by("a").agg(pl.col("b").len())
assert_gpu_result_equal(q, check_row_order=False)

0 comments on commit 07406b3

Please sign in to comment.