Skip to content

Commit

Permalink
Fix value_counts with split_out != 1 (#1170)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Nov 19, 2024
1 parent e406551 commit bb45798
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
3 changes: 2 additions & 1 deletion dask_expr/_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def _lower(self):

# Reset the index if we we used it for shuffling
if split_by_index:
shuffled = SetIndexBlockwise(shuffled, split_by, True, None)
idx = list(self._meta.index.names) if split_by != ["index"] else split_by
shuffled = SetIndexBlockwise(shuffled, idx, True, None)

# Convert back to Series if necessary
if self.shuffle_by_index is not False:
Expand Down
7 changes: 7 additions & 0 deletions dask_expr/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ def test_groupby_no_numeric_only(pdf, func):
assert_eq(agg, expect)


def test_value_counts_split_out(pdf):
df = from_pandas(pdf, npartitions=10)
result = df.groupby("x").y.value_counts(split_out=True)
expected = pdf.groupby("x").y.value_counts()
assert_eq(result, expected)


def test_unique(df, pdf):
result = df.groupby("x")["y"].unique()
expected = pdf.groupby("x")["y"].unique()
Expand Down

0 comments on commit bb45798

Please sign in to comment.