Skip to content

Commit

Permalink
pandas 2.2: fix test_dataframe_groupby_tasks (#8475)
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky authored Jan 23, 2024
1 parent 487b4ec commit e30a3e4
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions distributed/tests/test_dask_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,28 +189,29 @@ async def test_loc(c, s, a, b):

@ignore_single_machine_warning
def test_dataframe_groupby_tasks(client):
INCLUDE_GROUPS = {"include_groups": False} if PANDAS_GE_220 else {}
df = make_time_dataframe()

df["A"] = df.A // 0.1
df["B"] = df.B // 0.1
ddf = dd.from_pandas(df, npartitions=10)

for ind in [lambda x: "A", lambda x: x.A]:
a = df.groupby(ind(df)).apply(len)
b = ddf.groupby(ind(ddf)).apply(len, meta=(None, int))
a = df.groupby(ind(df)).apply(len, **INCLUDE_GROUPS)
b = ddf.groupby(ind(ddf)).apply(len, meta=(None, int), **INCLUDE_GROUPS)
assert_equal(a, b.compute().sort_index())
assert not any("partd" in k[0] for k in b.dask)

a = df.groupby(ind(df)).B.apply(len)
b = ddf.groupby(ind(ddf)).B.apply(len, meta=("B", int))
a = df.groupby(ind(df)).B.apply(len, **INCLUDE_GROUPS)
b = ddf.groupby(ind(ddf)).B.apply(len, meta=("B", int), **INCLUDE_GROUPS)
assert_equal(a, b.compute().sort_index())
assert not any("partd" in k[0] for k in b.dask)

with pytest.raises((NotImplementedError, ValueError)):
ddf.groupby(ddf[["A", "B"]]).apply(len, meta=int)
ddf.groupby(ddf[["A", "B"]]).apply(len, meta=int, **INCLUDE_GROUPS)

a = df.groupby(["A", "B"]).apply(len)
b = ddf.groupby(["A", "B"]).apply(len, meta=(None, int))
a = df.groupby(["A", "B"]).apply(len, **INCLUDE_GROUPS)
b = ddf.groupby(["A", "B"]).apply(len, meta=(None, int), **INCLUDE_GROUPS)

assert_equal(a, b.compute().sort_index())

Expand Down

0 comments on commit e30a3e4

Please sign in to comment.