From 34e7404731bc4517bd77dfb93eab04fddc28e29d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 18 Jan 2024 16:02:16 +0000 Subject: [PATCH] Fix get_device_memory_ids (#1305) A recent change to the way `StringColumn`s are implemented in cudf threw up that we were never correctly determining the number of device buffers belonging to cudf columns if they had children (e.g. list and struct columns) or masks (any nullable column). Handle those cases and update the test. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/dask-cuda/pull/1305 --- dask_cuda/get_device_memory_objects.py | 4 ++++ dask_cuda/tests/test_proxify_host_file.py | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dask_cuda/get_device_memory_objects.py b/dask_cuda/get_device_memory_objects.py index c5746c862..cd079f4ed 100644 --- a/dask_cuda/get_device_memory_objects.py +++ b/dask_cuda/get_device_memory_objects.py @@ -124,6 +124,10 @@ def get_device_memory_objects_cudf_index(obj): def get_device_memory_objects_cudf_multiindex(obj): return dispatch(obj._columns) + @dispatch.register(cudf.core.column.ColumnBase) + def get_device_memory_objects_cudf_column(obj): + return dispatch(obj.data) + dispatch(obj.children) + dispatch(obj.mask) + @sizeof.register_lazy("cupy") def register_cupy(): # NB: this overwrites dask.sizeof.register_cupy() diff --git a/dask_cuda/tests/test_proxify_host_file.py b/dask_cuda/tests/test_proxify_host_file.py index b1c9a9d52..2683ea36d 100644 --- a/dask_cuda/tests/test_proxify_host_file.py +++ b/dask_cuda/tests/test_proxify_host_file.py @@ -302,13 +302,24 @@ def test_dataframes_share_dev_mem(root_dir): def test_cudf_get_device_memory_objects(): cudf = pytest.importorskip("cudf") objects = [ - cudf.DataFrame({"a": range(10), "b": range(10)}, index=reversed(range(10))), + cudf.DataFrame( + {"a": [0, 1, 2, 3, None, 5, 6, 7, 8, 9], "b": range(10)}, + index=reversed(range(10)), + ), cudf.MultiIndex( levels=[[1, 2], ["blue", "red"]], codes=[[0, 0, 1, 1], [1, 0, 1, 0]] ), ] res = get_device_memory_ids(objects) - assert len(res) == 4, "We expect four buffer objects" + # Buffers are: + # 1. int data for objects[0].a + # 2. mask data for objects[0].a + # 3. int data for objects[0].b + # 4. int data for objects[0].index + # 5. int data for objects[1].levels[0] + # 6. char data for objects[1].levels[1] + # 7. offset data for objects[1].levels[1] + assert len(res) == 7, "We expect seven buffer objects" def test_externals(root_dir):