From e4b118f4c7d7a3ae6b82388600d5e7dd51b0119d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 4 Oct 2024 17:26:51 +0200 Subject: [PATCH] Fix binops with one partition --- dask_expr/_expr.py | 3 +++ dask_expr/tests/test_collection.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/dask_expr/_expr.py b/dask_expr/_expr.py index aa807eab..30321716 100644 --- a/dask_expr/_expr.py +++ b/dask_expr/_expr.py @@ -2632,6 +2632,9 @@ def _divisions(self): else: return super()._divisions() + def _broadcast_dep(self, dep: Expr): + return dep.npartitions == 1 + class Add(Binop): operation = operator.add diff --git a/dask_expr/tests/test_collection.py b/dask_expr/tests/test_collection.py index 1d5a341a..8090cf04 100644 --- a/dask_expr/tests/test_collection.py +++ b/dask_expr/tests/test_collection.py @@ -2674,3 +2674,32 @@ def test_to_backend_simplify(): assert str(df2.expr) != str(df[["y"]].expr) df3 = df2.simplify() assert str(df3.expr) == str(df[["y"]].expr) + + +def test_add_different_index_one_partition(): + pdf1 = pd.DataFrame( + { + "prediction_probability": [1.0] * 2, + "prediction": [1, 1], + "num_runs": [1, 1], + "Idx": [1, 4], + } + ).set_index("Idx") + + pdf2 = pd.DataFrame( + { + "prediction_probability": [1.0] * 2, + "prediction": [1, 1], + "num_runs": [ + 1, + 1, + ], + "Idx": [1, 4], + } + ).set_index("Idx") + + df1 = from_pandas(pdf1) + df2 = from_pandas(pdf2) + df1["prediction"] = df1.prediction.add(df2.prediction, fill_value=0) + pdf1["prediction"] = pdf1.prediction.add(pdf2.prediction, fill_value=0) + assert_eq(df1, pdf1)