From 0fb485d66d8a5c91ddd6bc213782b3653368263d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 22 Mar 2024 11:26:39 -0700 Subject: [PATCH] First pass at unblocking dask-expr issues - replace _Frame usage --- dask_sql/physical/rel/custom/wrappers.py | 6 +++--- dask_sql/physical/utils/filter.py | 14 +++++++------- tests/unit/test_ml_utils.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dask_sql/physical/rel/custom/wrappers.py b/dask_sql/physical/rel/custom/wrappers.py index 49d4adb64..af7619306 100644 --- a/dask_sql/physical/rel/custom/wrappers.py +++ b/dask_sql/physical/rel/custom/wrappers.py @@ -207,7 +207,7 @@ def transform(self, X): estimator=self._postfit_estimator, meta=output_meta, ) - elif isinstance(X, dd._Frame): + elif isinstance(X, dd.DataFrame): if output_meta is None: output_meta = _transform(X._meta_nonempty, self._postfit_estimator) try: @@ -305,7 +305,7 @@ def predict(self, X): ) return result - elif isinstance(X, dd._Frame): + elif isinstance(X, dd.DataFrame): if output_meta is None: # dask-dataframe relies on dd.core.no_default # for infering meta @@ -364,7 +364,7 @@ def predict_proba(self, X): meta=output_meta, chunks=(X.chunks[0], len(self._postfit_estimator.classes_)), ) - elif isinstance(X, dd._Frame): + elif isinstance(X, dd.DataFrame): if output_meta is None: # dask-dataframe relies on dd.core.no_default # for infering meta diff --git a/dask_sql/physical/utils/filter.py b/dask_sql/physical/utils/filter.py index aff9ab5ef..6e820cd80 100644 --- a/dask_sql/physical/utils/filter.py +++ b/dask_sql/physical/utils/filter.py @@ -304,10 +304,10 @@ def combine(self, other: DNF | _And | _Or | list | tuple | None) -> DNF: # Specify functions that must be generated with # a different API at the dataframe-collection level _special_op_mappings = { - M.fillna: dd._Frame.fillna, - M.isin: dd._Frame.isin, - M.isna: dd._Frame.isna, - M.astype: dd._Frame.astype, + M.fillna: dd.DataFrame.fillna, + M.isin: dd.DataFrame.isin, + M.isna: dd.DataFrame.isna, + M.astype: dd.DataFrame.astype, } # Convert _pass_through_ops to respect "special" mappings @@ -316,7 +316,7 @@ def combine(self, other: DNF | _And | _Or | list | tuple | None) -> DNF: def _preprocess_layers(input_layers): # NOTE: This is a Layer-specific work-around to deal with - # the fact that `dd._Frame.isin(values)` will add a distinct + # the fact that `dd.DataFrame.isin(values)` will add a distinct # `MaterializedLayer` for the `values` argument. # See: https://github.com/dask-contrib/dask-sql/issues/607 skip = set() @@ -418,9 +418,9 @@ def _dnf_filter_expression(self, dsk): func = _blockwise_logical_dnf elif op == operator.getitem: func = _blockwise_getitem_dnf - elif op == dd._Frame.isin: + elif op == dd.DataFrame.isin: func = _blockwise_isin_dnf - elif op == dd._Frame.isna: + elif op == dd.DataFrame.isna: func = _blockwise_isna_dnf elif op == operator.inv: func = _blockwise_inv_dnf diff --git a/tests/unit/test_ml_utils.py b/tests/unit/test_ml_utils.py index 7130b2bed..2c7365f00 100644 --- a/tests/unit/test_ml_utils.py +++ b/tests/unit/test_ml_utils.py @@ -98,7 +98,7 @@ def make_classification( def _assert_eq(l, r, name=None, **kwargs): array_types = (np.ndarray, da.Array) - frame_types = (pd.core.generic.NDFrame, dd._Frame) + frame_types = (pd.core.generic.NDFrame, dd.DataFrame) if isinstance(l, array_types): assert_eq_ar(l, r, **kwargs) elif isinstance(l, frame_types):