From e764a4e61a0bf53002db9ff1bae8914be675a863 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 23 Jul 2024 21:08:17 +0200 Subject: [PATCH] Add argument to enable cuDF spilling and set statistics --- dask_cuda/cli.py | 18 ++++++++++++++++++ dask_cuda/cuda_worker.py | 5 ++++- dask_cuda/local_cuda_cluster.py | 15 ++++++++++++++- dask_cuda/plugins.py | 14 ++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index ba58fe3e5..f595bccf3 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -101,6 +101,20 @@ def cuda(): total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to disable spilling to host (i.e. allow full device memory usage).""", ) +@click.option( + "--enable-cudf-spill/--disable-cudf-spill", + default=False, + show_default=True, + help="""Enable automatic cuDF spilling. WARNING: This should NOT be used with + JIT-Unspill.""", +) +@click.option( + "--cudf-spill-stats", + type=int, + default=0, + help="""Set the cuDF spilling statistics level. This option has no effect if + `--enable-cudf-spill` is not specified.""" +) @click.option( "--rmm-pool-size", default=None, @@ -330,6 +344,8 @@ def worker( name, memory_limit, device_memory_limit, + enable_cudf_spill, + cudf_spill_stats, rmm_pool_size, rmm_maximum_pool_size, rmm_managed_memory, @@ -402,6 +418,8 @@ def worker( name, memory_limit, device_memory_limit, + enable_cudf_spill, + cudf_spill_stats, rmm_pool_size, rmm_maximum_pool_size, rmm_managed_memory, diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py index e25a7c142..1d60ffdc5 100644 --- a/dask_cuda/cuda_worker.py +++ b/dask_cuda/cuda_worker.py @@ -20,7 +20,7 @@ from .device_host_file import DeviceHostFile from .initialize import initialize -from .plugins import CPUAffinity, PreImport, RMMSetup +from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup from .proxify_host_file import ProxifyHostFile from .utils import ( cuda_visible_devices, @@ -41,6 +41,8 @@ def __init__( name=None, memory_limit="auto", device_memory_limit="auto", + enable_cudf_spill=False, + cudf_spill_stats=0, rmm_pool_size=None, rmm_maximum_pool_size=None, rmm_managed_memory=False, @@ -217,6 +219,7 @@ def del_pid_file(): track_allocations=rmm_track_allocations, ), PreImport(pre_import), + CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats), }, name=name if nprocs == 1 or name is None else str(name) + "-" + str(i), local_directory=local_directory, diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py index 1b81c7703..af99e8ced 100644 --- a/dask_cuda/local_cuda_cluster.py +++ b/dask_cuda/local_cuda_cluster.py @@ -10,7 +10,7 @@ from .device_host_file import DeviceHostFile from .initialize import initialize -from .plugins import CPUAffinity, PreImport, RMMSetup +from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup from .proxify_host_file import ProxifyHostFile from .utils import ( cuda_visible_devices, @@ -73,6 +73,14 @@ class LocalCUDACluster(LocalCluster): starts spilling to host memory. Can be an integer (bytes), float (fraction of total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0, or ``None`` to disable spilling to host (i.e. allow full device memory usage). + enable_cudf_spill : bool, default False + Enable automatic cuDF spilling. + + .. warning:: + This should NOT be used together with JIT-Unspill. + cudf_spill_stats : int, default 0 + Set the cuDF spilling statistics level. This option has no effect if + ``enable_cudf_spill=False``. local_directory : str or None, default None Path on local machine to store temporary files. Can be a string (like ``"path/to/files"``) or ``None`` to fall back on the value of @@ -209,6 +217,8 @@ def __init__( threads_per_worker=1, memory_limit="auto", device_memory_limit=0.8, + enable_cudf_spill=False, + cudf_spill_stats=0, data=None, local_directory=None, shared_filesystem=None, @@ -259,6 +269,8 @@ def __init__( self.device_memory_limit = parse_device_memory_limit( device_memory_limit, device_index=nvml_device_index(0, CUDA_VISIBLE_DEVICES) ) + self.enable_cudf_spill = enable_cudf_spill + self.cudf_spill_stats = cudf_spill_stats self.rmm_pool_size = rmm_pool_size self.rmm_maximum_pool_size = rmm_maximum_pool_size @@ -414,6 +426,7 @@ def new_worker_spec(self): track_allocations=self.rmm_track_allocations, ), PreImport(self.pre_import), + CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats), }, } ) diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py index 4eba97f2b..31308e098 100644 --- a/dask_cuda/plugins.py +++ b/dask_cuda/plugins.py @@ -14,6 +14,20 @@ def setup(self, worker=None): os.sched_setaffinity(0, self.cores) +class CUDFSetup(WorkerPlugin): + def __init__(self, spill, spill_stats): + self.spill = spill + self.spill_stats = spill_stats + + def setup(self, worker=None): + try: + import cudf + cudf.set_option("spill", self.spill) + cudf.set_option("spill_stats", self.spill_stats) + except ImportError: + pass + + class RMMSetup(WorkerPlugin): def __init__( self,