Merge branch 'branch-23.10' into pandas20

rapidsai · Sep 21, 2023 · e7b005f · e7b005f
2 parents 134b3bf + 63ba2cc
commit e7b005f
Show file tree

Hide file tree

Showing 21 changed files with 183 additions and 49 deletions.
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
@@ -5,5 +5,4 @@ auto_merger: true
 branch_checker: true
 label_checker: true
 release_drafter: true
-copy_prs: true
 recently_updated: true
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   conda-python-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -38,7 +38,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: [conda-python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -51,7 +51,7 @@ jobs:
   upload-conda:
     needs: [conda-python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -18,26 +18,26 @@ jobs:
       - docs-build
       - wheel-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.10
   conda-python-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,35 @@
+# dask-cuda 23.08.00 (9 Aug 2023)
+
+## 🐛 Bug Fixes
+
+- Ensure plugin config can be passed from worker to client ([#1212](https://github.com/rapidsai/dask-cuda/pull/1212)) [@wence-](https://github.com/wence-)
+- Adjust to new `get_default_shuffle_method` name ([#1200](https://github.com/rapidsai/dask-cuda/pull/1200)) [@pentschev](https://github.com/pentschev)
+- Increase minimum timeout to wait for workers in CI ([#1192](https://github.com/rapidsai/dask-cuda/pull/1192)) [@pentschev](https://github.com/pentschev)
+
+## 📖 Documentation
+
+- Remove RTD configuration and references to RTD page ([#1211](https://github.com/rapidsai/dask-cuda/pull/1211)) [@charlesbluca](https://github.com/charlesbluca)
+- Clarify `memory_limit` docs ([#1207](https://github.com/rapidsai/dask-cuda/pull/1207)) [@pentschev](https://github.com/pentschev)
+
+## 🚀 New Features
+
+- Remove versioneer ([#1204](https://github.com/rapidsai/dask-cuda/pull/1204)) [@pentschev](https://github.com/pentschev)
+- Remove code for Distributed&lt;2023.5.1 compatibility ([#1191](https://github.com/rapidsai/dask-cuda/pull/1191)) [@pentschev](https://github.com/pentschev)
+- Specify disk spill compression based on Dask config ([#1190](https://github.com/rapidsai/dask-cuda/pull/1190)) [@pentschev](https://github.com/pentschev)
+
+## 🛠️ Improvements
+
+- Pin `dask` and `distributed` for `23.08` release ([#1214](https://github.com/rapidsai/dask-cuda/pull/1214)) [@galipremsagar](https://github.com/galipremsagar)
+- Revert CUDA 12.0 CI workflows to branch-23.08. ([#1210](https://github.com/rapidsai/dask-cuda/pull/1210)) [@bdice](https://github.com/bdice)
+- Use minimal Numba dependencies for CUDA 12 ([#1209](https://github.com/rapidsai/dask-cuda/pull/1209)) [@jakirkham](https://github.com/jakirkham)
+- Aggregate reads &amp; writes in `disk_io` ([#1205](https://github.com/rapidsai/dask-cuda/pull/1205)) [@jakirkham](https://github.com/jakirkham)
+- CUDA 12 Support ([#1201](https://github.com/rapidsai/dask-cuda/pull/1201)) [@quasiben](https://github.com/quasiben)
+- Remove explicit UCX config from tests ([#1199](https://github.com/rapidsai/dask-cuda/pull/1199)) [@pentschev](https://github.com/pentschev)
+- use rapids-upload-docs script ([#1194](https://github.com/rapidsai/dask-cuda/pull/1194)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Unpin `dask` and `distributed` for development ([#1189](https://github.com/rapidsai/dask-cuda/pull/1189)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove documentation build scripts for Jenkins ([#1187](https://github.com/rapidsai/dask-cuda/pull/1187)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Use KvikIO in Dask-CUDA ([#925](https://github.com/rapidsai/dask-cuda/pull/925)) [@jakirkham](https://github.com/jakirkham)
+
 # dask-cuda 23.06.00 (7 Jun 2023)
 
 ## 🚨 Breaking Changes

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -23,7 +23,7 @@ rapids-mamba-retry install \
     --channel "${PYTHON_CHANNEL}" \
     dask-cuda
 
-export RAPIDS_VERSION_NUMBER="23.08"
+export RAPIDS_VERSION_NUMBER="23.10"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build Python docs"

diff --git a/ci/build_python.sh b/ci/build_python.sh
@@ -11,7 +11,7 @@ rapids-print-env
 
 rapids-logger "Begin py build"
 
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   conda/recipes/dask-cuda
 
 rapids-upload-conda-to-s3 python
diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -74,6 +74,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \
 UCX_MEMTYPE_CACHE=n \
 timeout 40m pytest \
   -vv \
+  --durations=0 \
   --capture=no \
   --cache-clear \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda.xml" \

diff --git a/conda/recipes/dask-cuda/meta.yaml b/conda/recipes/dask-cuda/meta.yaml
@@ -32,7 +32,7 @@ requirements:
     - tomli
   run:
     - python
-    - dask-core >=2023.5.1
+    - dask-core >=2023.7.1
     {% for r in data.get("project", {}).get("dependencies", []) %}
     - {{ r }}
     {% endfor %}

diff --git a/dask_cuda/__init__.py b/dask_cuda/__init__.py
@@ -19,7 +19,7 @@
 from .local_cuda_cluster import LocalCUDACluster
 from .proxify_device_objects import proxify_decorator, unproxify_decorator
 
-__version__ = "23.08.00"
+__version__ = "23.10.00"
 
 
 # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`

diff --git a/dask_cuda/device_host_file.py b/dask_cuda/device_host_file.py
@@ -4,7 +4,7 @@
 import time
 
 import numpy
-from zict import Buffer, File, Func
+from zict import Buffer, Func
 from zict.common import ZictBase
 
 import dask
@@ -17,6 +17,7 @@
     serialize_bytelist,
 )
 from distributed.sizeof import safe_sizeof
+from distributed.spill import CustomFile as KeyAsStringFile
 from distributed.utils import nbytes
 
 from .is_device_object import is_device_object
@@ -201,7 +202,10 @@ def __init__(
         self.disk_func = Func(
             _serialize_bytelist,
             deserialize_bytes,
-            File(self.disk_func_path),
+            # Task keys are not strings, so this takes care of
+            # converting arbitrary tuple keys into a string before
+            # handing off to zict.File
+            KeyAsStringFile(self.disk_func_path),
         )
 
         host_buffer_kwargs = {}

diff --git a/dask_cuda/explicit_comms/comms.py b/dask_cuda/explicit_comms/comms.py
@@ -6,7 +6,6 @@
 from typing import Any, Dict, Hashable, Iterable, List, Optional
 
 import distributed.comm
-from dask.utils import stringify
 from distributed import Client, Worker, default_client, get_worker
 from distributed.comm.addressing import parse_address, parse_host_port, unparse_address
 
@@ -305,8 +304,7 @@ def stage_keys(self, name: str, keys: Iterable[Hashable]) -> Dict[int, set]:
         dict
             dict that maps each worker-rank to the workers set of staged keys
         """
-        key_set = {stringify(k) for k in keys}
-        return dict(self.run(_stage_keys, name, key_set))
+        return dict(self.run(_stage_keys, name, set(keys)))
 
 
 def pop_staging_area(session_state: dict, name: str) -> Dict[str, Any]:

diff --git a/dask_cuda/proxy_object.py b/dask_cuda/proxy_object.py
@@ -19,7 +19,6 @@
 import distributed.utils
 from dask.sizeof import sizeof
 from distributed.protocol.compression import decompress
-from distributed.worker import dumps_function, loads_function
 
 from dask_cuda.disk_io import disk_read
 
@@ -85,7 +84,7 @@ def asproxy(
             subclass = ProxyObject
             subclass_serialized = None
         else:
-            subclass_serialized = dumps_function(subclass)
+            subclass_serialized = pickle.dumps(subclass)
 
         ret = subclass(
             ProxyDetail(
@@ -440,7 +439,7 @@ def __reduce__(self):
         pxy = self._pxy_get(copy=True)
         pxy.serialize(serializers=("pickle",))
         if pxy.subclass:
-            subclass = loads_function(pxy.subclass)
+            subclass = pickle.loads(pxy.subclass)
         else:
             subclass = ProxyObject
 
@@ -882,7 +881,7 @@ def obj_pxy_dask_deserialize(header, frames):
     if args["subclass"] is None:
         subclass = ProxyObject
     else:
-        subclass = loads_function(args["subclass"])
+        subclass = pickle.loads(args["subclass"])
     pxy = ProxyDetail(obj=(header["proxied-header"], frames), **args)
     if pxy.serializer == "disk":
         header, _ = pxy.obj

diff --git a/dask_cuda/tests/test_dask_cuda_worker.py b/dask_cuda/tests/test_dask_cuda_worker.py
@@ -153,6 +153,56 @@ def test_rmm_async(loop):  # noqa: F811
                 assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
 
 
+def test_rmm_async_with_maximum_pool_size(loop):  # noqa: F811
+    rmm = pytest.importorskip("rmm")
+
+    driver_version = rmm._cuda.gpu.driverGetVersion()
+    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
+    if driver_version < 11020 or runtime_version < 11020:
+        pytest.skip("cudaMallocAsync not supported")
+
+    with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
+        with popen(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9369",
+                "--host",
+                "127.0.0.1",
+                "--rmm-async",
+                "--rmm-pool-size",
+                "2 GB",
+                "--rmm-release-threshold",
+                "3 GB",
+                "--rmm-maximum-pool-size",
+                "4 GB",
+                "--no-dashboard",
+            ]
+        ):
+            with Client("127.0.0.1:9369", loop=loop) as client:
+                assert wait_workers(client, n_gpus=get_n_gpus())
+
+                memory_resource_types = client.run(
+                    lambda: (
+                        rmm.mr.get_current_device_resource_type(),
+                        type(rmm.mr.get_current_device_resource().get_upstream()),
+                    )
+                )
+                for v in memory_resource_types.values():
+                    memory_resource_type, upstream_memory_resource_type = v
+                    assert memory_resource_type is rmm.mr.LimitingResourceAdaptor
+                    assert (
+                        upstream_memory_resource_type is rmm.mr.CudaAsyncMemoryResource
+                    )
+
+                ret = get_cluster_configuration(client)
+                wait(ret)
+                assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
+                assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
+                assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 4000000000
+
+
 def test_rmm_logging(loop):  # noqa: F811
     rmm = pytest.importorskip("rmm")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
@@ -450,5 +500,12 @@ def test_worker_timeout():
     )
 
     assert "closing nanny at" in ret.stderr.lower()
-    assert "reason: nanny-close" in ret.stderr.lower()
+
+    # Depending on the environment, the error raised may be different
+    try:
+        assert "reason: failure-to-start-" in ret.stderr.lower()
+        assert "timeouterror" in ret.stderr.lower()
+    except AssertionError:
+        assert "reason: nanny-close" in ret.stderr.lower()
+
     assert ret.returncode == 0
diff --git a/dask_cuda/tests/test_local_cuda_cluster.py b/dask_cuda/tests/test_local_cuda_cluster.py
@@ -261,6 +261,40 @@ async def test_rmm_async():
             assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
 
 
+@gen_test(timeout=20)
+async def test_rmm_async_with_maximum_pool_size():
+    rmm = pytest.importorskip("rmm")
+
+    driver_version = rmm._cuda.gpu.driverGetVersion()
+    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
+    if driver_version < 11020 or runtime_version < 11020:
+        pytest.skip("cudaMallocAsync not supported")
+
+    async with LocalCUDACluster(
+        rmm_async=True,
+        rmm_pool_size="2GB",
+        rmm_release_threshold="3GB",
+        rmm_maximum_pool_size="4GB",
+        asynchronous=True,
+    ) as cluster:
+        async with Client(cluster, asynchronous=True) as client:
+            memory_resource_types = await client.run(
+                lambda: (
+                    rmm.mr.get_current_device_resource_type(),
+                    type(rmm.mr.get_current_device_resource().get_upstream()),
+                )
+            )
+            for v in memory_resource_types.values():
+                memory_resource_type, upstream_memory_resource_type = v
+                assert memory_resource_type is rmm.mr.LimitingResourceAdaptor
+                assert upstream_memory_resource_type is rmm.mr.CudaAsyncMemoryResource
+
+            ret = await get_cluster_configuration(client)
+            assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
+            assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
+            assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 4000000000
+
+
 @gen_test(timeout=20)
 async def test_rmm_logging():
     rmm = pytest.importorskip("rmm")
@@ -420,6 +454,7 @@ async def test_get_cluster_configuration():
 @gen_test(timeout=20)
 async def test_worker_fraction_limits():
     async with LocalCUDACluster(
+        dashboard_address=None,
         device_memory_limit=0.1,
         rmm_pool_size=0.2,
         rmm_maximum_pool_size=0.3,

diff --git a/dask_cuda/tests/test_proxify_host_file.py b/dask_cuda/tests/test_proxify_host_file.py
@@ -384,7 +384,7 @@ def test_incompatible_types(root_dir):
 
 @pytest.mark.parametrize("npartitions", [1, 2, 3])
 @pytest.mark.parametrize("compatibility_mode", [True, False])
-@gen_test(timeout=20)
+@gen_test(timeout=30)
 async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartitions):
     cudf = pytest.importorskip("cudf")
 

diff --git a/dask_cuda/tests/test_proxy.py b/dask_cuda/tests/test_proxy.py
@@ -400,7 +400,7 @@ def _pxy_deserialize(self):
 
 @pytest.mark.parametrize("send_serializers", [None, ("dask", "pickle"), ("cuda",)])
 @pytest.mark.parametrize("protocol", ["tcp", "ucx"])
-@gen_test(timeout=20)
+@gen_test(timeout=60)
 async def test_communicating_proxy_objects(protocol, send_serializers):
     """Testing serialization of cuDF dataframe when communicating"""
     cudf = pytest.importorskip("cudf")