From 5780c4d8fb5afac2e04988a2ff5531f94c22d3a3 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Tue, 13 Aug 2024 13:46:31 -0700 Subject: [PATCH] Register `read_parquet` and `read_csv` with dask-expr (#16535) After https://github.com/dask/dask-expr/pull/1114, Dask cuDF must register specific `read_parquet` and `read_csv` functions to be used when query-planning is enabled (the default). **This PR is required for CI to pass with dask>2024.8.0** **NOTE**: It probably doesn't make sense to add specific tests for this change. Once the 2014.7.1 dask pin is removed, all `dask_cudf` tests using `read_parquet` and `read_csv` will fail without this change... Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Benjamin Zaitlen (https://github.com/quasiben) URL: https://github.com/rapidsai/cudf/pull/16535 --- python/dask_cudf/dask_cudf/backends.py | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 2b1f745fc04..01bab30190a 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -667,6 +667,41 @@ def from_dict( constructor=constructor, ) + @staticmethod + def read_parquet(*args, engine=None, **kwargs): + import dask_expr as dx + + from dask_cudf.io.parquet import CudfEngine + + return _default_backend( + dx.read_parquet, *args, engine=CudfEngine, **kwargs + ) + + @staticmethod + def read_csv( + path, + *args, + header="infer", + dtype_backend=None, + storage_options=None, + **kwargs, + ): + import dask_expr as dx + from fsspec.utils import stringify_path + + if not isinstance(path, str): + path = stringify_path(path) + return dx.new_collection( + dx.io.csv.ReadCSV( + path, + dtype_backend=dtype_backend, + storage_options=storage_options, + kwargs=kwargs, + header=header, + dataframe_backend="cudf", + ) + ) + @staticmethod def read_json(*args, **kwargs): from dask_cudf.io.json import read_json as read_json_impl