Skip to content

Commit

Permalink
Move plugins to new dask_cuda.plugins module
Browse files Browse the repository at this point in the history
  • Loading branch information
pentschev committed Oct 23, 2023
1 parent d94a028 commit 2e73bc4
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 124 deletions.
4 changes: 1 addition & 3 deletions dask_cuda/cuda_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@

from .device_host_file import DeviceHostFile
from .initialize import initialize
from .plugins import CPUAffinity, PreImport, RMMSetup
from .proxify_host_file import ProxifyHostFile
from .utils import (
CPUAffinity,
PreImport,
RMMSetup,
cuda_visible_devices,
get_cpu_affinity,
get_n_gpus,
Expand Down
4 changes: 1 addition & 3 deletions dask_cuda/local_cuda_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@

from .device_host_file import DeviceHostFile
from .initialize import initialize
from .plugins import CPUAffinity, PreImport, RMMSetup
from .proxify_host_file import ProxifyHostFile
from .utils import (
CPUAffinity,
PreImport,
RMMSetup,
cuda_visible_devices,
get_cpu_affinity,
get_ucx_config,
Expand Down
122 changes: 122 additions & 0 deletions dask_cuda/plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import importlib
import os

from distributed import WorkerPlugin

from .utils import get_rmm_log_file_name, parse_device_memory_limit


class CPUAffinity(WorkerPlugin):
def __init__(self, cores):
self.cores = cores

def setup(self, worker=None):
os.sched_setaffinity(0, self.cores)


class RMMSetup(WorkerPlugin):
def __init__(
self,
initial_pool_size,
maximum_pool_size,
managed_memory,
async_alloc,
release_threshold,
log_directory,
track_allocations,
):
if initial_pool_size is None and maximum_pool_size is not None:
raise ValueError(
"`rmm_maximum_pool_size` was specified without specifying "
"`rmm_pool_size`.`rmm_pool_size` must be specified to use RMM pool."
)
if async_alloc is True:
if managed_memory is True:
raise ValueError(
"`rmm_managed_memory` is incompatible with the `rmm_async`."
)
if async_alloc is False and release_threshold is not None:
raise ValueError("`rmm_release_threshold` requires `rmm_async`.")

self.initial_pool_size = initial_pool_size
self.maximum_pool_size = maximum_pool_size
self.managed_memory = managed_memory
self.async_alloc = async_alloc
self.release_threshold = release_threshold
self.logging = log_directory is not None
self.log_directory = log_directory
self.rmm_track_allocations = track_allocations

def setup(self, worker=None):
if self.initial_pool_size is not None:
self.initial_pool_size = parse_device_memory_limit(
self.initial_pool_size, alignment_size=256
)

if self.async_alloc:
import rmm

if self.release_threshold is not None:
self.release_threshold = parse_device_memory_limit(
self.release_threshold, alignment_size=256
)

mr = rmm.mr.CudaAsyncMemoryResource(
initial_pool_size=self.initial_pool_size,
release_threshold=self.release_threshold,
)

if self.maximum_pool_size is not None:
self.maximum_pool_size = parse_device_memory_limit(
self.maximum_pool_size, alignment_size=256
)
mr = rmm.mr.LimitingResourceAdaptor(
mr, allocation_limit=self.maximum_pool_size
)

rmm.mr.set_current_device_resource(mr)
if self.logging:
rmm.enable_logging(
log_file_name=get_rmm_log_file_name(
worker, self.logging, self.log_directory
)
)
elif self.initial_pool_size is not None or self.managed_memory:
import rmm

pool_allocator = False if self.initial_pool_size is None else True

if self.initial_pool_size is not None:
if self.maximum_pool_size is not None:
self.maximum_pool_size = parse_device_memory_limit(
self.maximum_pool_size, alignment_size=256
)

rmm.reinitialize(
pool_allocator=pool_allocator,
managed_memory=self.managed_memory,
initial_pool_size=self.initial_pool_size,
maximum_pool_size=self.maximum_pool_size,
logging=self.logging,
log_file_name=get_rmm_log_file_name(
worker, self.logging, self.log_directory
),
)
if self.rmm_track_allocations:
import rmm

mr = rmm.mr.get_current_device_resource()
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))


class PreImport(WorkerPlugin):
def __init__(self, libraries):
if libraries is None:
libraries = []
elif isinstance(libraries, str):
libraries = libraries.split(",")
self.libraries = libraries

def setup(self, worker=None):
for l in self.libraries:
importlib.import_module(l)
119 changes: 1 addition & 118 deletions dask_cuda/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import importlib
import math
import operator
import os
Expand All @@ -18,7 +17,7 @@
import distributed # noqa: required for dask.config.get("distributed.comm.ucx")
from dask.config import canonical_name
from dask.utils import format_bytes, parse_bytes
from distributed import Worker, WorkerPlugin, wait
from distributed import Worker, wait
from distributed.comm import parse_address

try:
Expand All @@ -32,122 +31,6 @@ def nvtx_annotate(message=None, color="blue", domain=None):
yield


class CPUAffinity(WorkerPlugin):
def __init__(self, cores):
self.cores = cores

def setup(self, worker=None):
os.sched_setaffinity(0, self.cores)


class RMMSetup(WorkerPlugin):
def __init__(
self,
initial_pool_size,
maximum_pool_size,
managed_memory,
async_alloc,
release_threshold,
log_directory,
track_allocations,
):
if initial_pool_size is None and maximum_pool_size is not None:
raise ValueError(
"`rmm_maximum_pool_size` was specified without specifying "
"`rmm_pool_size`.`rmm_pool_size` must be specified to use RMM pool."
)
if async_alloc is True:
if managed_memory is True:
raise ValueError(
"`rmm_managed_memory` is incompatible with the `rmm_async`."
)
if async_alloc is False and release_threshold is not None:
raise ValueError("`rmm_release_threshold` requires `rmm_async`.")

self.initial_pool_size = initial_pool_size
self.maximum_pool_size = maximum_pool_size
self.managed_memory = managed_memory
self.async_alloc = async_alloc
self.release_threshold = release_threshold
self.logging = log_directory is not None
self.log_directory = log_directory
self.rmm_track_allocations = track_allocations

def setup(self, worker=None):
if self.initial_pool_size is not None:
self.initial_pool_size = parse_device_memory_limit(
self.initial_pool_size, alignment_size=256
)

if self.async_alloc:
import rmm

if self.release_threshold is not None:
self.release_threshold = parse_device_memory_limit(
self.release_threshold, alignment_size=256
)

mr = rmm.mr.CudaAsyncMemoryResource(
initial_pool_size=self.initial_pool_size,
release_threshold=self.release_threshold,
)

if self.maximum_pool_size is not None:
self.maximum_pool_size = parse_device_memory_limit(
self.maximum_pool_size, alignment_size=256
)
mr = rmm.mr.LimitingResourceAdaptor(
mr, allocation_limit=self.maximum_pool_size
)

rmm.mr.set_current_device_resource(mr)
if self.logging:
rmm.enable_logging(
log_file_name=get_rmm_log_file_name(
worker, self.logging, self.log_directory
)
)
elif self.initial_pool_size is not None or self.managed_memory:
import rmm

pool_allocator = False if self.initial_pool_size is None else True

if self.initial_pool_size is not None:
if self.maximum_pool_size is not None:
self.maximum_pool_size = parse_device_memory_limit(
self.maximum_pool_size, alignment_size=256
)

rmm.reinitialize(
pool_allocator=pool_allocator,
managed_memory=self.managed_memory,
initial_pool_size=self.initial_pool_size,
maximum_pool_size=self.maximum_pool_size,
logging=self.logging,
log_file_name=get_rmm_log_file_name(
worker, self.logging, self.log_directory
),
)
if self.rmm_track_allocations:
import rmm

mr = rmm.mr.get_current_device_resource()
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))


class PreImport(WorkerPlugin):
def __init__(self, libraries):
if libraries is None:
libraries = []
elif isinstance(libraries, str):
libraries = libraries.split(",")
self.libraries = libraries

def setup(self, worker=None):
for l in self.libraries:
importlib.import_module(l)


def unpack_bitmask(x, mask_bits=64):
"""Unpack a list of integers containing bitmasks.
Expand Down

0 comments on commit 2e73bc4

Please sign in to comment.