From 12e0051e30bfbdf886b22098591e84e64f8556bd Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Mon, 7 Oct 2024 17:24:52 -0700 Subject: [PATCH] POC pytorch in dask-cuda Signed-off-by: Vibhu Jawa --- dask_cuda/cli.py | 10 ++++++ dask_cuda/cuda_worker.py | 5 +++ dask_cuda/local_cuda_cluster.py | 11 ++++++ dask_cuda/plugins.py | 9 ++++- dask_cuda/utils.py | 59 ++++++++++++++++++++++++++++++++- 5 files changed, 92 insertions(+), 2 deletions(-) diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index a8c6d972c..8d335ec39 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -164,6 +164,14 @@ def cuda(): incompatible with RMM pools and managed memory, trying to enable both will result in failure.""", ) +@click.option( + "--set-rmm-allocator-for-libs", + default=None, + show_default=True, + help=""" + Set RMM as the allocator for external libraries. Provide a comma-separated + list of libraries to set, e.g., "torch,cupy". Supported options are: torch, cupy.""", +) @click.option( "--rmm-release-threshold", default=None, @@ -351,6 +359,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, @@ -425,6 +434,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py index 3e03ed297..91128eb10 100644 --- a/dask_cuda/cuda_worker.py +++ b/dask_cuda/cuda_worker.py @@ -47,6 +47,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -202,6 +203,9 @@ def del_pid_file(): "processes set `CUDF_SPILL=on` as well. To disable this warning " "set `DASK_CUDF_SPILL_WARNING=False`." ) + + if rmm_allocator_external_lib_list is not None: + rmm_allocator_external_lib_list = [s.strip() for s in rmm_allocator_external_lib_list.split(',')] self.nannies = [ Nanny( @@ -231,6 +235,7 @@ def del_pid_file(): release_threshold=rmm_release_threshold, log_directory=rmm_log_directory, track_allocations=rmm_track_allocations, + external_lib_list=rmm_allocator_external_lib_list, ), PreImport(pre_import), CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats), diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py index c037223b2..ed95ba3b4 100644 --- a/dask_cuda/local_cuda_cluster.py +++ b/dask_cuda/local_cuda_cluster.py @@ -143,6 +143,10 @@ class LocalCUDACluster(LocalCluster): The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also incompatible with RMM pools and managed memory. Trying to enable both will result in an exception. + rmm_allocator_external_lib_list: str or list or None, default None + Set RMM as the allocator for external libraries. Can be a comma-separated + string (like "torch,cupy"). + rmm_release_threshold: int, str or None, default None When ``rmm.async is True`` and the pool size grows beyond this value, unused memory held by the pool will be released at the next synchronization point. @@ -231,6 +235,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -284,6 +289,11 @@ def __init__( self.rmm_managed_memory = rmm_managed_memory self.rmm_async = rmm_async self.rmm_release_threshold = rmm_release_threshold + if rmm_allocator_external_lib_list is not None: + rmm_allocator_external_lib_list = [s.strip() for s in + rmm_allocator_external_lib_list.split(',')] + self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list + if rmm_pool_size is not None or rmm_managed_memory or rmm_async: try: import rmm # noqa F401 @@ -437,6 +447,7 @@ def new_worker_spec(self): release_threshold=self.rmm_release_threshold, log_directory=self.rmm_log_directory, track_allocations=self.rmm_track_allocations, + external_lib_list=self.rmm_allocator_external_lib_list ), PreImport(self.pre_import), CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats), diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py index 122f93ffa..4e3726e46 100644 --- a/dask_cuda/plugins.py +++ b/dask_cuda/plugins.py @@ -3,7 +3,7 @@ from distributed import WorkerPlugin -from .utils import get_rmm_log_file_name, parse_device_memory_limit +from .utils import get_rmm_log_file_name, parse_device_memory_limit, enable_rmm_memory_for_library class CPUAffinity(WorkerPlugin): @@ -39,6 +39,7 @@ def __init__( release_threshold, log_directory, track_allocations, + external_lib_list, ): if initial_pool_size is None and maximum_pool_size is not None: raise ValueError( @@ -61,6 +62,8 @@ def __init__( self.logging = log_directory is not None self.log_directory = log_directory self.rmm_track_allocations = track_allocations + self.external_lib_list = external_lib_list + def setup(self, worker=None): if self.initial_pool_size is not None: @@ -122,6 +125,10 @@ def setup(self, worker=None): mr = rmm.mr.get_current_device_resource() rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr)) + + if self.external_lib_list is not None: + for lib in self.external_lib_list: + enable_rmm_memory_for_library(lib) class PreImport(WorkerPlugin): diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index ff4dbbae3..2f92d94bf 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -7,7 +7,7 @@ from contextlib import suppress from functools import singledispatch from multiprocessing import cpu_count -from typing import Optional +from typing import Optional, Callable, Dict import numpy as np import pynvml @@ -764,3 +764,60 @@ def get_rmm_memory_resource_stack(mr) -> list: if isinstance(mr, rmm.mr.StatisticsResourceAdaptor): return mr.allocation_counts["current_bytes"] return None + + +def enable_rmm_memory_for_library(lib_name: str) -> None: + """ + Enable RMM memory pool support for a specified third-party library. + + This function allows the given library to utilize RMM's memory pool if it supports + integration with RMM. The library name is passed as a string argument, and if the + library is compatible, its memory allocator will be configured to use RMM. + + Parameters + ---------- + lib_name : str + The name of the third-party library to enable RMM memory pool support for. + + Raises + ------ + ValueError + If the library name is not supported or does not have RMM integration. + ImportError + If the required library is not installed. + """ + + # Mapping of supported libraries to their respective setup functions + setup_functions: Dict[str, Callable[[], None]] = { + "torch": _setup_rmm_for_torch, + "cupy": _setup_rmm_for_cupy, + } + + if lib_name not in setup_functions: + supported_libs = ', '.join(setup_functions.keys()) + raise ValueError( + f"The library '{lib_name}' is not supported for RMM integration. " + f"Supported libraries are: {supported_libs}." + ) + + # Call the setup function for the specified library + setup_functions[lib_name]() + +def _setup_rmm_for_torch() -> None: + try: + import torch + except ImportError as e: + raise ImportError("PyTorch is not installed.") from e + + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + +def _setup_rmm_for_cupy() -> None: + try: + import cupy + except ImportError as e: + raise ImportError("CuPy is not installed.") from e + + from rmm.allocators.cupy import rmm_cupy_allocator + cupy.cuda.set_allocator(rmm_cupy_allocator)