Skip to content

Commit

Permalink
Specify disk spill compression based on Dask config (#1190)
Browse files Browse the repository at this point in the history
Spill to disk compression was introduced in dask/distributed#7768 and Dask-CUDA should also allow modifying the default compression via Dask config. This change is required to support `distributed>=2023.5.0`.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #1190
  • Loading branch information
pentschev authored Jun 5, 2023
1 parent bba3d3f commit bf60373
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
17 changes: 14 additions & 3 deletions dask_cuda/device_host_file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import itertools
import logging
import os
Expand All @@ -8,6 +7,7 @@
from zict import Buffer, File, Func
from zict.common import ZictBase

import dask
from distributed.protocol import (
dask_deserialize,
dask_serialize,
Expand All @@ -17,13 +17,24 @@
serialize_bytelist,
)
from distributed.sizeof import safe_sizeof
from distributed.utils import nbytes
from distributed.utils import has_arg, nbytes

from .is_device_object import is_device_object
from .is_spillable_object import is_spillable_object
from .utils import nvtx_annotate


def _serialize_bytelist(x, **kwargs):
kwargs["on_error"] = "raise"

if has_arg(serialize_bytelist, "compression"):
compression = dask.config.get("distributed.worker.memory.spill-compression")
return serialize_bytelist(x, compression=compression, **kwargs)
else:
# For Distributed < 2023.5.0 compatibility
return serialize_bytelist(x, **kwargs)


class LoggedBuffer(Buffer):
"""Extends zict.Buffer with logging capabilities
Expand Down Expand Up @@ -192,7 +203,7 @@ def __init__(

self.host_func = dict()
self.disk_func = Func(
functools.partial(serialize_bytelist, on_error="raise"),
_serialize_bytelist,
deserialize_bytes,
File(self.disk_func_path),
)
Expand Down
1 change: 1 addition & 0 deletions dask_cuda/tests/test_spill.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ async def test_cudf_cluster_device_spill(params):
{
"distributed.comm.compression": False,
"distributed.worker.memory.terminate": False,
"distributed.worker.memory.spill-compression": False,
}
):
async with LocalCUDACluster(
Expand Down

0 comments on commit bf60373

Please sign in to comment.