From 1df54c478db35f7d50c42e67e36dffc007e43556 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Tue, 8 Oct 2024 16:44:09 -0400 Subject: [PATCH 01/27] deleted $ for multiline commands --- source/cloud/azure/aks.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/cloud/azure/aks.md b/source/cloud/azure/aks.md index 3c2d0732..c74facd3 100644 --- a/source/cloud/azure/aks.md +++ b/source/cloud/azure/aks.md @@ -23,7 +23,7 @@ $ az login Now we can launch a GPU enabled AKS cluster. First launch an AKS cluster. ```console -$ az aks create -g -n rapids \ + az aks create -g -n rapids \ --enable-managed-identity \ --node-count 1 \ --enable-addons monitoring \ @@ -92,7 +92,7 @@ $ az extension add --name aks-preview ````` ```console -$ az aks nodepool add \ + az aks nodepool add \ --resource-group \ --cluster-name rapids \ --name gpunp \ From 9b7088a0830e506b1e73b3d48577f9b6c3ccea77 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:06:39 -0400 Subject: [PATCH 02/27] Update source/cloud/azure/aks.md Co-authored-by: Jacob Tomlinson --- source/cloud/azure/aks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/azure/aks.md b/source/cloud/azure/aks.md index c74facd3..ce2eae3d 100644 --- a/source/cloud/azure/aks.md +++ b/source/cloud/azure/aks.md @@ -22,7 +22,7 @@ $ az login Now we can launch a GPU enabled AKS cluster. First launch an AKS cluster. -```console +```bash az aks create -g -n rapids \ --enable-managed-identity \ --node-count 1 \ From f1a8682d9cd3f7cccbd095dd4eb7904dfffdc092 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:06:47 -0400 Subject: [PATCH 03/27] Update source/cloud/azure/aks.md Co-authored-by: Jacob Tomlinson --- source/cloud/azure/aks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/azure/aks.md b/source/cloud/azure/aks.md index ce2eae3d..e331b54a 100644 --- a/source/cloud/azure/aks.md +++ b/source/cloud/azure/aks.md @@ -23,7 +23,7 @@ $ az login Now we can launch a GPU enabled AKS cluster. First launch an AKS cluster. ```bash - az aks create -g -n rapids \ +az aks create -g -n rapids \ --enable-managed-identity \ --node-count 1 \ --enable-addons monitoring \ From bcf36bcf3d6676589d87b0bf38d1ffccf3daedc8 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:06:52 -0400 Subject: [PATCH 04/27] Update source/cloud/azure/aks.md Co-authored-by: Jacob Tomlinson --- source/cloud/azure/aks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/azure/aks.md b/source/cloud/azure/aks.md index e331b54a..d70c95fa 100644 --- a/source/cloud/azure/aks.md +++ b/source/cloud/azure/aks.md @@ -91,7 +91,7 @@ $ az extension add --name aks-preview ````` -```console +```bash az aks nodepool add \ --resource-group \ --cluster-name rapids \ From f54e67b3d0b84dab38bd52104ad6f3dea8ecc1f1 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Wed, 9 Oct 2024 14:23:12 -0400 Subject: [PATCH 05/27] fixed multiline command issue --- source/_includes/check-gpu-pod-works.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/_includes/check-gpu-pod-works.md b/source/_includes/check-gpu-pod-works.md index bd5593c7..617a1944 100644 --- a/source/_includes/check-gpu-pod-works.md +++ b/source/_includes/check-gpu-pod-works.md @@ -1,7 +1,7 @@ Let's create a sample pod that uses some GPU compute to make sure that everything is working as expected. -```console -$ cat << EOF | kubectl create -f - +```bash +cat << EOF | kubectl create -f - apiVersion: v1 kind: Pod metadata: From c57b8c3602687ae275eb030c2919070c3263f754 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 11:10:49 -0400 Subject: [PATCH 06/27] added more detailed instructions --- source/cloud/azure/azureml.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/azure/azureml.md b/source/cloud/azure/azureml.md index 5bfc1a99..ffe502ac 100644 --- a/source/cloud/azure/azureml.md +++ b/source/cloud/azure/azureml.md @@ -32,7 +32,7 @@ The compute instance provides an integrated Jupyter notebook service, JupyterLab Sign in to [Azure Machine Learning Studio](https://ml.azure.com/) and navigate to your workspace on the left-side menu. -Select **Compute** > **+ New** > choose a [RAPIDS compatible GPU](https://medium.com/dropout-analytics/which-gpus-work-with-rapids-ai-f562ef29c75f) VM size (e.g., `Standard_NC12s_v3`) +Select **Compute** > **+ New** (Create compute instance) > choose a [RAPIDS compatible GPU](https://medium.com/dropout-analytics/which-gpus-work-with-rapids-ai-f562ef29c75f) VM size (e.g., `Standard_NC12s_v3`) ![Screenshot of create new notebook with a gpu-instance](../../images/azureml-create-notebook-instance.png) From 1205ab9c00a0c5f7f6cf4e2cfede53866ca5714a Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 12:27:46 -0400 Subject: [PATCH 07/27] added clearer user input sections --- source/examples/rapids-azureml-hpo/notebook.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index d4bee24c..f2e3ac73 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -100,9 +100,9 @@ "# Get a handle to the workspace\n", "ml_client = MLClient(\n", " credential=DefaultAzureCredential(),\n", - " subscription_id=\"fc4f4a6b-4041-4b1c-8249-854d68edcf62\",\n", - " resource_group_name=\"rapidsai-deployment\",\n", - " workspace_name=\"rapids-aml-cluster\",\n", + " subscription_id= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", + " resource_group_name= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", + " workspace_name= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", ")\n", "\n", "print(\n", From 33b27db579e3a4a67bea002d470f314db483ada1 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 12:56:39 -0400 Subject: [PATCH 08/27] more descripted title --- source/examples/rapids-azureml-hpo/notebook.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index f2e3ac73..9248a42f 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -12,7 +12,7 @@ ] }, "source": [ - "# Train and Hyperparameter-Tune with RAPIDS" + "# Train and Hyperparameter-Tune with RAPIDS on AzureML" ] }, { From 203465810835131c90dbcbb148a348b356baf9f2 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 13:11:56 -0400 Subject: [PATCH 09/27] fixed linting errors --- source/examples/rapids-azureml-hpo/notebook.ipynb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index 9248a42f..4b8ca523 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -97,12 +97,17 @@ "from azure.ai.ml import MLClient\n", "from azure.identity import DefaultAzureCredential\n", "\n", + "\n", + "subscription_id = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "resource_group_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "workspace_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n", + "\n", "# Get a handle to the workspace\n", "ml_client = MLClient(\n", " credential=DefaultAzureCredential(),\n", - " subscription_id= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", - " resource_group_name= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", - " workspace_name= #FILL IN WITH YOUR AZURE ML CREDENTIALS,\n", + " subscription_id= subscription_id,\n", + " resource_group_name= resource_group_name\n", + " workspace_name= workspace_name\n", ")\n", "\n", "print(\n", From 540a35a2539a44c25e2a14848741d1e99a51f63a Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 13:21:37 -0400 Subject: [PATCH 10/27] fixed small linting error --- source/examples/rapids-azureml-hpo/notebook.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index 4b8ca523..256a05dc 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -106,7 +106,7 @@ "ml_client = MLClient(\n", " credential=DefaultAzureCredential(),\n", " subscription_id= subscription_id,\n", - " resource_group_name= resource_group_name\n", + " resource_group_name= resource_group_name,\n", " workspace_name= workspace_name\n", ")\n", "\n", From ef7a978ded63daab557508757e768f317fb3911f Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 14:16:02 -0400 Subject: [PATCH 11/27] updated ubuntu versions --- source/guides/azure/infiniband.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index daca2391..f1e38327 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -13,8 +13,8 @@ for demonstration. - Select `East US` region. - Change `Availability options` to `Availability set` and create a set. - If building multiple instances put additional instances in the same set. -- Use the 2nd Gen Ubuntu 20.04 image. - - Search all images for `Ubuntu Server 20.04` and choose the second one down on the list. +- Use the 2nd Gen Ubuntu 24.04 image. + - Search all images for `Ubuntu Server 24.04` and choose the second one down on the list. - Change size to `ND40rs_v2`. - Set password login with credentials. - User `someuser` @@ -39,8 +39,8 @@ The commands below should work for Ubuntu. See the [CUDA Toolkit documentation]( ```shell sudo apt-get install -y linux-headers-$(uname -r) distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') -wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.0-1_all.deb -sudo dpkg -i cuda-keyring_1.0-1_all.deb +wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-drivers ``` @@ -118,7 +118,7 @@ Mon Nov 14 20:32:39 2022 ### InfiniBand Driver -On Ubuntu 20.04 +On Ubuntu 24.04 ```shell sudo apt-get install -y automake dh-make git libcap2 libnuma-dev libtool make pkg-config udev curl librdmacm-dev rdma-core \ From 8a204a9a11b0aab95398f3452f2772ba70953667 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 14:43:05 -0400 Subject: [PATCH 12/27] got rid of outdated package --- source/guides/azure/infiniband.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index f1e38327..3d1ec48e 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -122,7 +122,7 @@ On Ubuntu 24.04 ```shell sudo apt-get install -y automake dh-make git libcap2 libnuma-dev libtool make pkg-config udev curl librdmacm-dev rdma-core \ - libgfortran5 bison chrpath flex graphviz gfortran tk dpatch quilt swig tcl ibverbs-utils + libgfortran5 bison chrpath flex graphviz gfortran tk quilt swig tcl ibverbs-utils ``` Check install From 8c3a176ce13ea5101a60816f63ae23291e27619f Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 14:44:14 -0400 Subject: [PATCH 13/27] added intermediary step for clarity --- source/guides/azure/infiniband.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index 3d1ec48e..6e3ef981 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -247,14 +247,20 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg bash Mambaforge-Linux-x86_64.sh ``` -Accept the default and allow conda init to run. Then start a new shell. +Accept the default and allow conda init to run. +``shell +~/mambaforge/bin/conda init + +```` + +Then start a new shell. Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs) ```shell mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y mamba activate ucxpy -``` +```` Clone UCX-Py repo locally From 59d63439aa27f3e2ebad291e6087695c1106870b Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 15:11:48 -0400 Subject: [PATCH 14/27] changed hardcoded lines to FILL-THIS-IN --- .../xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb index 01586d14..73cf685e 100644 --- a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb +++ b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb @@ -178,10 +178,10 @@ "metadata": {}, "outputs": [], "source": [ - "location = \"West US 2\"\n", - "resource_group = \"rapidsai-deployment\"\n", - "vnet = \"rapidsai-deployment-vnet\"\n", - "security_group = \"rapidsaiclouddeploymenttest-nsg\"\n", + "location = \"FILL-THIS-IN\"\n", + "resource_group = \"FILL-THIS-IN\"\n", + "vnet = \"FILL-THIS-IN\"\n", + "security_group = \"FILL-THIS-IN\"\n", "vm_size = \"Standard_NC12s_v3\" # or choose a different GPU enabled VM type\n", "\n", "docker_image = \"{{rapids_container}}\"\n", From fa6613ac2590060bd874a7cb91f6039d2f86bc08 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:41:35 -0400 Subject: [PATCH 15/27] Update source/guides/azure/infiniband.md Co-authored-by: James Lamb --- source/guides/azure/infiniband.md | 1 - 1 file changed, 1 deletion(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index 6e3ef981..18e049fc 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -260,7 +260,6 @@ Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest ```shell mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y mamba activate ucxpy -```` Clone UCX-Py repo locally From 3e7aacee27c96451449cc76a32dbc46b22cd7c71 Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:41:45 -0400 Subject: [PATCH 16/27] Update source/guides/azure/infiniband.md Co-authored-by: James Lamb --- source/guides/azure/infiniband.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index 18e049fc..d76bca73 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -248,7 +248,7 @@ bash Mambaforge-Linux-x86_64.sh ``` Accept the default and allow conda init to run. -``shell +```shell ~/mambaforge/bin/conda init ```` From c62f7ab0a7a0a1791fc5234d6735d0316ca02bba Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:41:50 -0400 Subject: [PATCH 17/27] Update source/guides/azure/infiniband.md Co-authored-by: James Lamb --- source/guides/azure/infiniband.md | 1 - 1 file changed, 1 deletion(-) diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index d76bca73..b3f92dbe 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -251,7 +251,6 @@ Accept the default and allow conda init to run. ```shell ~/mambaforge/bin/conda init -```` Then start a new shell. From b9c13172526ac464a42275c32164e3226466dede Mon Sep 17 00:00:00 2001 From: Melody Wang <98235366+melodywang060@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:42:09 -0400 Subject: [PATCH 18/27] Update source/cloud/azure/aks.md Co-authored-by: James Lamb --- source/cloud/azure/aks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/cloud/azure/aks.md b/source/cloud/azure/aks.md index d70c95fa..8917fc3c 100644 --- a/source/cloud/azure/aks.md +++ b/source/cloud/azure/aks.md @@ -92,7 +92,7 @@ $ az extension add --name aks-preview ````` ```bash - az aks nodepool add \ +az aks nodepool add \ --resource-group \ --cluster-name rapids \ --name gpunp \ From 1880bc90f4314a8dbfba4d21cd2e7e94bf2be3e2 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Thu, 10 Oct 2024 22:22:37 -0400 Subject: [PATCH 19/27] fixed backtick error --- package-lock.json | 28 ++++++++++++++++++++++++++++ package.json | 5 +++++ source/guides/azure/infiniband.md | 5 +++-- 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 package-lock.json create mode 100644 package.json diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..b9036882 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,28 @@ +{ + "name": "deployment", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "prettier": "3.3.3" + } + }, + "node_modules/prettier": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.3.tgz", + "integrity": "sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 00000000..a32393d7 --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "devDependencies": { + "prettier": "3.3.3" + } +} diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index b3f92dbe..f2feff4e 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -248,15 +248,16 @@ bash Mambaforge-Linux-x86_64.sh ``` Accept the default and allow conda init to run. + ```shell ~/mambaforge/bin/conda init - +``` Then start a new shell. Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs) -```shell +````shell mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y mamba activate ucxpy From 7c07bbe891b771ee60af36b6f800556ffb71deac Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Fri, 11 Oct 2024 00:27:15 -0400 Subject: [PATCH 20/27] ran black and pretty to format files: --- source/examples/rapids-azureml-hpo/notebook.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb index 256a05dc..4295e8c6 100644 --- a/source/examples/rapids-azureml-hpo/notebook.ipynb +++ b/source/examples/rapids-azureml-hpo/notebook.ipynb @@ -105,9 +105,9 @@ "# Get a handle to the workspace\n", "ml_client = MLClient(\n", " credential=DefaultAzureCredential(),\n", - " subscription_id= subscription_id,\n", - " resource_group_name= resource_group_name,\n", - " workspace_name= workspace_name\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group_name,\n", + " workspace_name=workspace_name,\n", ")\n", "\n", "print(\n", From 9802cbf04920f7ac3e99d7ea0b0bf506b55d2163 Mon Sep 17 00:00:00 2001 From: Melody Wang Date: Fri, 11 Oct 2024 00:30:06 -0400 Subject: [PATCH 21/27] ran ruff --- extensions/rapids_notebook_files.py | 8 +- extensions/rapids_related_examples.py | 18 +- extensions/rapids_version_templating.py | 12 +- source/conf.py | 20 +- .../rapids-1brc-single-node/notebook.ipynb | 4 +- .../notebook.ipynb | 40 +- .../rapids-azureml-hpo/notebook.ipynb | 9 +- .../rapids-azureml-hpo/rapids_csp_azure.py | 48 +- .../rapids-azureml-hpo/train_rapids.py | 28 +- .../examples/rapids-ec2-mnmg/notebook.ipynb | 16 +- .../examples/rapids-optuna-hpo/notebook.ipynb | 12 +- .../rapids-sagemaker-higgs/notebook.ipynb | 9 +- .../rapids-sagemaker-higgs/rapids-higgs.py | 4 +- .../rapids-sagemaker-hpo/HPOConfig.py | 12 +- .../rapids-sagemaker-hpo/HPODatasets.py | 2 +- .../rapids-sagemaker-hpo/MLWorkflow.py | 4 +- .../rapids-sagemaker-hpo/helper_functions.py | 29 +- .../rapids-sagemaker-hpo/notebook.ipynb | 12 +- source/examples/rapids-sagemaker-hpo/serve.py | 25 +- source/examples/rapids-sagemaker-hpo/train.py | 8 +- .../workflows/MLWorkflowMultiCPU.py | 12 +- .../workflows/MLWorkflowMultiGPU.py | 16 +- .../workflows/MLWorkflowSingleCPU.py | 8 +- .../workflows/MLWorkflowSingleGPU.py | 25 +- .../notebook.ipynb | 217 +-- .../notebook.ipynb | 20 +- .../xgboost-dask-databricks/notebook.ipynb | 20 +- .../notebook.ipynb | 18 +- .../notebook.ipynb | 18 +- .../notebook.ipynb | 1462 ++++++++--------- .../notebook.ipynb | 8 +- .../xgboost-rf-gpu-cpu-benchmark/hpo.py | 24 +- 32 files changed, 899 insertions(+), 1269 deletions(-) diff --git a/extensions/rapids_notebook_files.py b/extensions/rapids_notebook_files.py index 66d68ef8..8b6b027f 100644 --- a/extensions/rapids_notebook_files.py +++ b/extensions/rapids_notebook_files.py @@ -16,9 +16,7 @@ def walk_files(app, dir, outdir): related_notebook_files = {} for page in dir.glob("*"): if page.is_dir(): - related_notebook_files[page.name] = walk_files( - app, page, outdir / page.name - ) + related_notebook_files[page.name] = walk_files(app, page, outdir / page.name) else: with contextlib.suppress(OSError): os.remove(str(outdir / page.name)) @@ -59,9 +57,7 @@ def find_notebook_related_files(app, pagename, templatename, context, doctree): path_to_output_parent = output_root / rel_page_parent # Copy all related files to output and apply templating - related_notebook_files = walk_files( - app, path_to_page_parent, path_to_output_parent - ) + related_notebook_files = walk_files(app, path_to_page_parent, path_to_output_parent) # Make archive of related files if related_notebook_files and len(related_notebook_files) > 1: diff --git a/extensions/rapids_related_examples.py b/extensions/rapids_related_examples.py index ef52bf3e..94312715 100644 --- a/extensions/rapids_related_examples.py +++ b/extensions/rapids_related_examples.py @@ -22,9 +22,7 @@ def read_notebook_tags(path: str) -> list[str]: return [] -def generate_notebook_grid_myst( - notebooks: list[str], env: BuildEnvironment -) -> list[str]: +def generate_notebook_grid_myst(notebooks: list[str], env: BuildEnvironment) -> list[str]: """Generate sphinx-design grid of notebooks in MyST markdown. Take a list of notebook documents and render out some MyST markdown displaying those @@ -75,11 +73,7 @@ def get_title_for_notebook(path: str) -> str: if i == len(cell_source) - 1: # no next_token continue next_token = cell_source[i + 1] - if ( - token.type == "heading_open" - and token.tag == "h1" - and next_token.type == "inline" - ): + if token.type == "heading_open" and token.tag == "h1" and next_token.type == "inline": return next_token.content raise ValueError("No top-level heading found") @@ -146,9 +140,7 @@ def add_notebook_tag_map_to_context(app, pagename, templatename, context, doctre except KeyError: tag_tree[root] = [suffix] context["notebook_tag_tree"] = tag_tree - context["notebook_tags"] = [ - tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages - ] + context["notebook_tags"] = [tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages] class NotebookGalleryTocTree(TocTree): @@ -162,9 +154,7 @@ def run(self) -> list[nodes.Node]: output += toctree # Generate the card grid for all items in the toctree - notebooks = [ - notebook for _, notebook in toctree[0].children[0].attributes["entries"] - ] + notebooks = [notebook for _, notebook in toctree[0].children[0].attributes["entries"]] grid_markdown = generate_notebook_grid_myst(notebooks=notebooks, env=self.env) for node in parse_markdown(markdown=grid_markdown, state=self.state): gallery += node diff --git a/extensions/rapids_version_templating.py b/extensions/rapids_version_templating.py index c2c71817..d8b12333 100644 --- a/extensions/rapids_version_templating.py +++ b/extensions/rapids_version_templating.py @@ -49,9 +49,7 @@ def visit_reference(self, node: nodes.reference) -> None: uri_str = re.sub(r"~~~(.*?)~~~", r"{{ \1 }}", uri_str) # fill in appropriate values based on app context - node.attributes["refuri"] = re.sub( - r"(? None: Replace template strings in generic text. This roughly corresponds to HTML ``

``, ``

``, and similar elements.
         """
-        new_node = nodes.Text(
-            re.sub(r"(? str:
@@ -71,9 +67,7 @@ def template_func(self, match: re.Match) -> str:
         Replace template strings like ``{{ rapids_version }}`` with real
         values like ``24.10``.
         """
-        return self.app.builder.templates.render_string(
-            source=match.group(), context=self.app.config.rapids_version
-        )
+        return self.app.builder.templates.render_string(source=match.group(), context=self.app.config.rapids_version)
 
 
 def version_template(
diff --git a/source/conf.py b/source/conf.py
index 929e29a1..4a8db58d 100644
--- a/source/conf.py
+++ b/source/conf.py
@@ -43,18 +43,12 @@
     },
 }
 rapids_version = (
-    versions["stable"]
-    if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true"
-    else versions["nightly"]
+    versions["stable"] if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true" else versions["nightly"]
 )
 rapids_version["rapids_conda_channels_list"] = [
-    channel
-    for channel in rapids_version["rapids_conda_channels"].split(" ")
-    if channel != "-c"
+    channel for channel in rapids_version["rapids_conda_channels"].split(" ") if channel != "-c"
 ]
-rapids_version["rapids_conda_packages_list"] = rapids_version[
-    "rapids_conda_packages"
-].split(" ")
+rapids_version["rapids_conda_packages_list"] = rapids_version["rapids_conda_packages"].split(" ")
 
 # -- General configuration ---------------------------------------------------
 
@@ -94,9 +88,7 @@
 # -- Options for notebooks -------------------------------------------------
 
 nb_execution_mode = "off"
-rapids_deployment_notebooks_base_url = (
-    "https://github.com/rapidsai/deployment/blob/main/source/"
-)
+rapids_deployment_notebooks_base_url = "https://github.com/rapidsai/deployment/blob/main/source/"
 
 # -- Options for HTML output -------------------------------------------------
 
@@ -146,8 +138,6 @@
 def setup(app):
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
     app.add_css_file("css/custom.css")
-    app.add_js_file(
-        "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
-    )
+    app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
     app.add_js_file("js/nav.js", loading_method="defer")
     app.add_js_file("js/notebook-gallery.js", loading_method="defer")
diff --git a/source/examples/rapids-1brc-single-node/notebook.ipynb b/source/examples/rapids-1brc-single-node/notebook.ipynb
index aee011e5..e1cde0c0 100755
--- a/source/examples/rapids-1brc-single-node/notebook.ipynb
+++ b/source/examples/rapids-1brc-single-node/notebook.ipynb
@@ -200,9 +200,7 @@
    "source": [
     "n = 1_000_000_000  # Number of rows of data to generate\n",
     "\n",
-    "lookup_df = cudf.read_csv(\n",
-    "    \"lookup.csv\"\n",
-    ")  # Load our lookup table of stations and their mean temperatures\n",
+    "lookup_df = cudf.read_csv(\"lookup.csv\")  # Load our lookup table of stations and their mean temperatures\n",
     "std = 10.0  # We assume temperatures are normally distributed with a standard deviation of 10\n",
     "chunksize = 2e8  # Set the number of rows to generate in one go (reduce this if you run into GPU RAM limits)\n",
     "filename = Path(\"measurements.txt\")  # Choose where to write to\n",
diff --git a/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb b/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
index 886a359d..751037cc 100644
--- a/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
+++ b/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
@@ -995,12 +995,8 @@
     "\n",
     "\n",
     "def map_haversine(part):\n",
-    "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
-    "        part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
-    "    )\n",
-    "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
-    "        part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
-    "    )\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
     "    return cuspatial.haversine_distance(pickup, dropoff)\n",
     "\n",
     "\n",
@@ -1506,9 +1502,7 @@
     "from random import randrange\n",
     "\n",
     "\n",
-    "def generate_workload(\n",
-    "    stages=3, min_width=1, max_width=3, variation=1, input_workload=None\n",
-    "):\n",
+    "def generate_workload(stages=3, min_width=1, max_width=3, variation=1, input_workload=None):\n",
     "    graph = [input_workload] if input_workload is not None else [run_haversine()]\n",
     "    last_width = min_width\n",
     "    for _ in range(stages):\n",
@@ -1646,35 +1640,25 @@
    ],
    "source": [
     "%%time\n",
-    "start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\n",
-    "    \"%Y-%m-%dT%H:%M:%SZ\"\n",
-    ")\n",
+    "start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
     "try:\n",
     "    # Start with a couple of concurrent workloads\n",
     "    workload = generate_workload(stages=10, max_width=2)\n",
     "    # Then increase demand as more users appear\n",
-    "    workload = generate_workload(\n",
-    "        stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
-    "    )\n",
+    "    workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
     "    # Now reduce the workload for a longer period of time, this could be over a lunchbreak or something\n",
     "    workload = generate_workload(stages=30, max_width=2, input_workload=workload)\n",
     "    # Everyone is back from lunch and it hitting the cluster hard\n",
-    "    workload = generate_workload(\n",
-    "        stages=10, max_width=10, min_width=3, variation=5, input_workload=workload\n",
-    "    )\n",
+    "    workload = generate_workload(stages=10, max_width=10, min_width=3, variation=5, input_workload=workload)\n",
     "    # The after lunch rush is easing\n",
-    "    workload = generate_workload(\n",
-    "        stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
-    "    )\n",
+    "    workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
     "    # As we get towards the end of the day demand slows off again\n",
     "    workload = generate_workload(stages=10, max_width=2, input_workload=workload)\n",
     "    workload.compute()\n",
     "finally:\n",
     "    client.close()\n",
     "    cluster.close()\n",
-    "    end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\n",
-    "        \"%Y-%m-%dT%H:%M:%SZ\"\n",
-    "    )"
+    "    end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
    ]
   },
   {
@@ -2037,14 +2021,10 @@
     "    end_time,\n",
     "    \"1s\",\n",
     ")\n",
-    "running_pods = running_pods[\n",
-    "    running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))\n",
-    "]\n",
+    "running_pods = running_pods[running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))]\n",
     "nodes = p.query_range(\"count(kube_node_info)\", start_time, end_time, \"1s\")\n",
     "nodes.columns = [\"Available GPUs\"]\n",
-    "nodes[\"Available GPUs\"] = (\n",
-    "    nodes[\"Available GPUs\"] * 2\n",
-    ")  # We know our nodes each had 2 GPUs\n",
+    "nodes[\"Available GPUs\"] = nodes[\"Available GPUs\"] * 2  # We know our nodes each had 2 GPUs\n",
     "nodes[\"Utilized GPUs\"] = running_pods.sum(axis=1)"
    ]
   },
diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb
index 4295e8c6..02667938 100644
--- a/source/examples/rapids-azureml-hpo/notebook.ipynb
+++ b/source/examples/rapids-azureml-hpo/notebook.ipynb
@@ -97,7 +97,6 @@
     "from azure.ai.ml import MLClient\n",
     "from azure.identity import DefaultAzureCredential\n",
     "\n",
-    "\n",
     "subscription_id = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
     "resource_group_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
     "workspace_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
@@ -219,9 +218,7 @@
     "    )\n",
     "    ml_client.compute.begin_create_or_update(gpu_target).result()\n",
     "\n",
-    "    print(\n",
-    "        f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\"\n",
-    "    )"
+    "    print(f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\")"
    ]
   },
   {
@@ -488,9 +485,7 @@
     "\n",
     "\n",
     "# Define the limits for this sweep\n",
-    "sweep_job.set_limits(\n",
-    "    max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600\n",
-    ")\n",
+    "sweep_job.set_limits(max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600)\n",
     "\n",
     "\n",
     "# Specify your experiment details\n",
diff --git a/source/examples/rapids-azureml-hpo/rapids_csp_azure.py b/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
index ea7724ea..683e120b 100644
--- a/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
+++ b/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
@@ -132,9 +132,7 @@ def load_hyperparams(self, model_name="XGBoost"):
             self.log_to_file(str(error))
             return
 
-    def load_data(
-        self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"
-    ):
+    def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"):
         """
         Loading the data into the object from the filename and based on the columns that we are
         interested in. Also, generates y_label from 'ArrDelay' column to convert this into a binary
@@ -185,9 +183,7 @@ def load_data(
 
                     elif "multi" in self.compute_type:
                         self.log_to_file("\n\tReading using dask dataframe")
-                        dataset = dask.dataframe.read_parquet(
-                            target_filename, columns=col_labels
-                        )
+                        dataset = dask.dataframe.read_parquet(target_filename, columns=col_labels)
 
             elif "GPU" in self.compute_type:
                 # GPU Reading Option
@@ -205,9 +201,7 @@ def load_data(
 
                     elif "multi" in self.compute_type:
                         self.log_to_file("\n\tReading using dask_cudf")
-                        dataset = dask_cudf.read_parquet(
-                            target_filename, columns=col_labels
-                        )
+                        dataset = dask_cudf.read_parquet(target_filename, columns=col_labels)
 
         # cast all columns to float32
         for col in dataset.columns:
@@ -222,14 +216,10 @@ def load_data(
         dataset = dataset.fillna(0.0)  # Filling the null values. Needed for dask-cudf
 
         self.log_to_file(f"\n\tIngestion completed in {ingestion_timer.duration}")
-        self.log_to_file(
-            f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}"
-        )
+        self.log_to_file(f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}")
         return dataset, col_labels, y_label, ingestion_timer.duration
 
-    def split_data(
-        self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True
-    ):
+    def split_data(self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True):
         """
         Splitting data into train and test split, has appropriate imports for different compute modes.
         CPU compute - Uses sklearn, we manually filter y_label column in the split call
@@ -321,13 +311,9 @@ def train_model(self, X_train, y_train, model_params):
 
         try:
             if self.model_type == "XGBoost":
-                trained_model, training_time = self.fit_xgboost(
-                    X_train, y_train, model_params
-                )
+                trained_model, training_time = self.fit_xgboost(X_train, y_train, model_params)
             elif self.model_type == "RandomForest":
-                trained_model, training_time = self.fit_random_forest(
-                    X_train, y_train, model_params
-                )
+                trained_model, training_time = self.fit_random_forest(X_train, y_train, model_params)
         except Exception as error:
             self.log_to_file("\n\n!error during model training: " + str(error))
         self.log_to_file(f"\n\tFinished training in {training_time:.4f} s")
@@ -354,9 +340,7 @@ def fit_xgboost(self, X_train, y_train, model_params):
                 )
             elif "multi" in self.compute_type:
                 self.log_to_file("\n\tTraining multi-GPU XGBoost")
-                train_DMatrix = xgboost.dask.DaskDMatrix(
-                    self.client, data=X_train, label=y_train
-                )
+                train_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_train, label=y_train)
                 trained_model = xgboost.dask.train(
                     self.client,
                     dtrain=train_DMatrix,
@@ -441,12 +425,8 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
             try:
                 if self.model_type == "XGBoost":
                     if "multi" in self.compute_type:
-                        test_DMatrix = xgboost.dask.DaskDMatrix(
-                            self.client, data=X_test, label=y_test
-                        )
-                        xgb_pred = xgboost.dask.predict(
-                            self.client, trained_model, test_DMatrix
-                        ).compute()
+                        test_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_test, label=y_test)
+                        xgb_pred = xgboost.dask.predict(self.client, trained_model, test_DMatrix).compute()
                         xgb_pred = (xgb_pred > threshold) * 1.0
                         test_accuracy = accuracy_score(y_test.compute(), xgb_pred)
                     elif "single" in self.compute_type:
@@ -459,13 +439,9 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
                     if "multi" in self.compute_type:
                         cuml_pred = trained_model.predict(X_test).compute()
                         self.log_to_file("\n\tPrediction complete")
-                        test_accuracy = accuracy_score(
-                            y_test.compute(), cuml_pred, convert_dtype=True
-                        )
+                        test_accuracy = accuracy_score(y_test.compute(), cuml_pred, convert_dtype=True)
                     elif "single" in self.compute_type:
-                        test_accuracy = trained_model.score(
-                            X_test, y_test.astype("int32")
-                        )
+                        test_accuracy = trained_model.score(X_test, y_test.astype("int32"))
 
             except Exception as error:
                 self.log_to_file("\n\n!error during inference: " + str(error))
diff --git a/source/examples/rapids-azureml-hpo/train_rapids.py b/source/examples/rapids-azureml-hpo/train_rapids.py
index 63ce4f5f..a170e6f5 100644
--- a/source/examples/rapids-azureml-hpo/train_rapids.py
+++ b/source/examples/rapids-azureml-hpo/train_rapids.py
@@ -28,12 +28,8 @@ def main():
     parser = argparse.ArgumentParser()
 
     parser.add_argument("--data_dir", type=str, help="location of data")
-    parser.add_argument(
-        "--n_estimators", type=int, default=100, help="Number of trees in RF"
-    )
-    parser.add_argument(
-        "--max_depth", type=int, default=16, help="Max depth of each tree"
-    )
+    parser.add_argument("--n_estimators", type=int, default=100, help="Number of trees in RF")
+    parser.add_argument("--max_depth", type=int, default=16, help="Max depth of each tree")
     parser.add_argument(
         "--n_bins",
         type=int,
@@ -52,9 +48,7 @@ def main():
         default="single-GPU",
         help="set to multi-GPU for algorithms via dask",
     )
-    parser.add_argument(
-        "--cv_folds", type=int, default=5, help="Number of CV fold splits"
-    )
+    parser.add_argument("--cv_folds", type=int, default=5, help="Number of CV fold splits")
 
     args = parser.parse_args()
     data_dir = args.data_dir
@@ -134,20 +128,14 @@ def main():
         print(f"\n CV fold { i_train_fold } of { cv_folds }\n")
 
         # split data
-        X_train, X_test, y_train, y_test, _ = azure_ml.split_data(
-            X, y, random_state=i_train_fold
-        )
+        X_train, X_test, y_train, y_test, _ = azure_ml.split_data(X, y, random_state=i_train_fold)
         # train model
-        trained_model, training_time = azure_ml.train_model(
-            X_train, y_train, model_params
-        )
+        trained_model, training_time = azure_ml.train_model(X_train, y_train, model_params)
 
         train_time_per_fold.append(round(training_time, 4))
 
         # evaluate perf
-        test_accuracy, infer_time = azure_ml.evaluate_test_perf(
-            trained_model, X_test, y_test
-        )
+        test_accuracy, infer_time = azure_ml.evaluate_test_perf(trained_model, X_test, y_test)
         accuracy_per_fold.append(round(test_accuracy, 4))
         infer_time_per_fold.append(round(infer_time, 4))
 
@@ -155,9 +143,7 @@ def main():
         if test_accuracy > global_best_test_accuracy:
             global_best_test_accuracy = test_accuracy
 
-    mlflow.log_metric(
-        "Total training inference time", np.float(training_time + infer_time)
-    )
+    mlflow.log_metric("Total training inference time", np.float(training_time + infer_time))
     mlflow.log_metric("Accuracy", np.float(global_best_test_accuracy))
     print("\n Accuracy             :", global_best_test_accuracy)
     print("\n accuracy per fold    :", accuracy_per_fold)
diff --git a/source/examples/rapids-ec2-mnmg/notebook.ipynb b/source/examples/rapids-ec2-mnmg/notebook.ipynb
index 79ca421a..d0f08884 100644
--- a/source/examples/rapids-ec2-mnmg/notebook.ipynb
+++ b/source/examples/rapids-ec2-mnmg/notebook.ipynb
@@ -284,9 +284,7 @@
     "taxi_df[\"is_weekend\"] = (taxi_df[\"day_of_week\"] >= 5).astype(\"int32\")\n",
     "\n",
     "# calculate the time difference between dropoff and pickup.\n",
-    "taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\n",
-    "    \"pickup_datetime\"\n",
-    "].astype(\"int32\")\n",
+    "taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\"pickup_datetime\"].astype(\"int32\")\n",
     "taxi_df[\"diff\"] = (taxi_df[\"diff\"] / 1000).astype(\"int32\")\n",
     "\n",
     "taxi_df[\"pickup_latitude_r\"] = taxi_df[\"pickup_latitude\"] // 0.01 * 0.01\n",
@@ -301,12 +299,8 @@
     "def haversine_dist(df):\n",
     "    import cuspatial\n",
     "\n",
-    "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
-    "        df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
-    "    )\n",
-    "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
-    "        df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
-    "    )\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
     "    df[\"h_distance\"] = cuspatial.haversine_distance(pickup, dropoff)\n",
     "    df[\"h_distance\"] = df[\"h_distance\"].astype(\"float32\")\n",
     "    return df\n",
@@ -331,9 +325,7 @@
    "outputs": [],
    "source": [
     "# Split into training and validation sets\n",
-    "X, y = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\"), taxi_df[\n",
-    "    \"fare_amount\"\n",
-    "].astype(\"float32\")\n",
+    "X, y = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\"), taxi_df[\"fare_amount\"].astype(\"float32\")\n",
     "X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True)"
    ]
   },
diff --git a/source/examples/rapids-optuna-hpo/notebook.ipynb b/source/examples/rapids-optuna-hpo/notebook.ipynb
index 127d08ce..678c85ca 100644
--- a/source/examples/rapids-optuna-hpo/notebook.ipynb
+++ b/source/examples/rapids-optuna-hpo/notebook.ipynb
@@ -175,9 +175,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def train_and_eval(\n",
-    "    X_param, y_param, penalty=\"l2\", C=1.0, l1_ratio=None, fit_intercept=True\n",
-    "):\n",
+    "def train_and_eval(X_param, y_param, penalty=\"l2\", C=1.0, l1_ratio=None, fit_intercept=True):\n",
     "    \"\"\"\n",
     "    Splits the given data into train and test split to train and evaluate the model\n",
     "    for the params parameters.\n",
@@ -194,9 +192,7 @@
     "    Returns\n",
     "    score: log loss of the fitted model\n",
     "    \"\"\"\n",
-    "    X_train, X_valid, y_train, y_valid = train_test_split(\n",
-    "        X_param, y_param, random_state=42\n",
-    "    )\n",
+    "    X_train, X_valid, y_train, y_valid = train_test_split(X_param, y_param, random_state=42)\n",
     "    classifier = LogisticRegression(\n",
     "        penalty=penalty,\n",
     "        C=C,\n",
@@ -263,9 +259,7 @@
     "    penalty = trial.suggest_categorical(\"penalty\", [\"none\", \"l1\", \"l2\"])\n",
     "    fit_intercept = trial.suggest_categorical(\"fit_intercept\", [True, False])\n",
     "\n",
-    "    score = train_and_eval(\n",
-    "        X_param, y_param, penalty=penalty, C=C, fit_intercept=fit_intercept\n",
-    "    )\n",
+    "    score = train_and_eval(X_param, y_param, penalty=penalty, C=C, fit_intercept=fit_intercept)\n",
     "    return score"
    ]
   },
diff --git a/source/examples/rapids-sagemaker-higgs/notebook.ipynb b/source/examples/rapids-sagemaker-higgs/notebook.ipynb
index 3282c3b5..ad648d37 100644
--- a/source/examples/rapids-sagemaker-higgs/notebook.ipynb
+++ b/source/examples/rapids-sagemaker-higgs/notebook.ipynb
@@ -402,9 +402,7 @@
    },
    "outputs": [],
    "source": [
-    "ECR_container_fullname = (\n",
-    "    f\"{account}.dkr.ecr.{region}.amazonaws.com/{estimator_info['ecr_image']}\"\n",
-    ")"
+    "ECR_container_fullname = f\"{account}.dkr.ecr.{region}.amazonaws.com/{estimator_info['ecr_image']}\""
    ]
   },
   {
@@ -457,10 +455,7 @@
     }
    ],
    "source": [
-    "print(\n",
-    "    f\"source      : {estimator_info['ecr_image']}\\n\"\n",
-    "    f\"destination : {ECR_container_fullname}\"\n",
-    ")"
+    "print(f\"source      : {estimator_info['ecr_image']}\\n\" f\"destination : {ECR_container_fullname}\")"
    ]
   },
   {
diff --git a/source/examples/rapids-sagemaker-higgs/rapids-higgs.py b/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
index 0093e574..cea9649b 100644
--- a/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
+++ b/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
@@ -13,9 +13,7 @@ def main(args):
     data_dir = args.data_dir
 
     col_names = ["label"] + [f"col-{i}" for i in range(2, 30)]  # Assign column names
-    dtypes_ls = ["int32"] + [
-        "float32" for _ in range(2, 30)
-    ]  # Assign dtypes to each column
+    dtypes_ls = ["int32"] + ["float32" for _ in range(2, 30)]  # Assign dtypes to each column
 
     data = cudf.read_csv(data_dir + "HIGGS.csv", names=col_names, dtype=dtypes_ls)
     X_train, X_test, y_train, y_test = train_test_split(data, "label", train_size=0.70)
diff --git a/source/examples/rapids-sagemaker-hpo/HPOConfig.py b/source/examples/rapids-sagemaker-hpo/HPOConfig.py
index f8fe94b9..e1a2be30 100644
--- a/source/examples/rapids-sagemaker-hpo/HPOConfig.py
+++ b/source/examples/rapids-sagemaker-hpo/HPOConfig.py
@@ -61,9 +61,7 @@ def __init__(
         ) = self.detect_data_inputs(directory_structure)
 
         self.model_store_directory = directory_structure["model_store"]
-        self.output_artifacts_directory = directory_structure[
-            "output_artifacts"
-        ]  # noqa
+        self.output_artifacts_directory = directory_structure["output_artifacts"]  # noqa
 
     def parse_configuration(self):
         """Parse the ENV variables [ set in the dockerfile ]
@@ -128,9 +126,7 @@ def parse_configuration(self):
 
     def parse_hyper_parameter_inputs(self, input_args):
         """Parse hyperparmeters provided by the HPO orchestrator"""
-        hpo_log.info(
-            "parsing model hyperparameters from command line arguments...log"
-        )  # noqa
+        hpo_log.info("parsing model hyperparameters from command line arguments...log")  # noqa
         parser = argparse.ArgumentParser()
 
         if "XGBoost" in self.model_type:
@@ -219,9 +215,7 @@ def detect_data_inputs(self, directory_structure):
                single-GPU cudf read_parquet needs a list of files
                multi-CPU/GPU can accept either a list or a directory
         """
-        parquet_files = glob.glob(
-            os.path.join(directory_structure["train_data"], "*.parquet")
-        )
+        parquet_files = glob.glob(os.path.join(directory_structure["train_data"], "*.parquet"))
         csv_files = glob.glob(os.path.join(directory_structure["train_data"], "*.csv"))
 
         if len(csv_files):
diff --git a/source/examples/rapids-sagemaker-hpo/HPODatasets.py b/source/examples/rapids-sagemaker-hpo/HPODatasets.py
index 35f347d3..3b0a139d 100644
--- a/source/examples/rapids-sagemaker-hpo/HPODatasets.py
+++ b/source/examples/rapids-sagemaker-hpo/HPODatasets.py
@@ -1,4 +1,4 @@
-""" Airline Dataset target label and feature column names  """
+"""Airline Dataset target label and feature column names"""
 
 airline_label_column = "ArrDel15"
 airline_feature_columns = [
diff --git a/source/examples/rapids-sagemaker-hpo/MLWorkflow.py b/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
index ee3e1431..31f8f065 100644
--- a/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
+++ b/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
@@ -89,9 +89,7 @@ def timed_execution_wrapper(*args, **kwargs):
         start_time = time.perf_counter()
         result = target_function(*args, **kwargs)
         exec_time = time.perf_counter() - start_time
-        hpo_log.info(
-            f" --- {target_function.__name__}" f" completed in {exec_time:.5f} s"
-        )
+        hpo_log.info(f" --- {target_function.__name__}" f" completed in {exec_time:.5f} s")
         return result
 
     return timed_execution_wrapper
diff --git a/source/examples/rapids-sagemaker-hpo/helper_functions.py b/source/examples/rapids-sagemaker-hpo/helper_functions.py
index 27a7a6cd..3b8bd1b2 100644
--- a/source/examples/rapids-sagemaker-hpo/helper_functions.py
+++ b/source/examples/rapids-sagemaker-hpo/helper_functions.py
@@ -51,10 +51,7 @@ def recommend_instance_type(code_choice, dataset_directory):
         detail_str = "4x GPUs [ V100 ], 64GB GPU memory,  244GB CPU memory"
         recommended_instance_type = "ml.p3.8xlarge"
 
-    print(
-        f"recommended instance type : {recommended_instance_type} \n"
-        f"instance details          : {detail_str}"
-    )
+    print(f"recommended instance type : {recommended_instance_type} \n" f"instance details          : {detail_str}")
 
     return recommended_instance_type
 
@@ -64,8 +61,7 @@ def validate_dockerfile(rapids_base_container, dockerfile_name="Dockerfile"):
     with open(dockerfile_name) as dockerfile_handle:
         if rapids_base_container not in dockerfile_handle.read():
             raise Exception(
-                "Dockerfile base layer [i.e. FROM statment] does"
-                " not match the variable rapids_base_container"
+                "Dockerfile base layer [i.e. FROM statment] does" " not match the variable rapids_base_container"
             )
 
 
@@ -106,17 +102,11 @@ def summarize_hpo_results(tuning_job_name):
     hpo_results = (
         boto3.Session()
         .client("sagemaker")
-        .describe_hyper_parameter_tuning_job(
-            HyperParameterTuningJobName=tuning_job_name
-        )
+        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)
     )
 
     best_job = hpo_results["BestTrainingJob"]["TrainingJobName"]
-    best_score = hpo_results["BestTrainingJob"][
-        "FinalHyperParameterTuningJobObjectiveMetric"
-    ][
-        "Value"
-    ]  # noqa
+    best_score = hpo_results["BestTrainingJob"]["FinalHyperParameterTuningJobObjectiveMetric"]["Value"]  # noqa
     best_params = hpo_results["BestTrainingJob"]["TunedHyperParameters"]
     print(f"best score: {best_score}")
     print(f"best params: {best_params}")
@@ -192,11 +182,7 @@ def new_job_name_from_config(
 
         random_str = "".join(random.choices(uuid.uuid4().hex, k=trim_limit))
 
-        job_name = (
-            f"{data_choice_str}-{code_choice_str}"
-            f"-{algorithm_choice_str}-{cv_folds}cv"
-            f"-{random_str}"
-        )
+        job_name = f"{data_choice_str}-{code_choice_str}" f"-{algorithm_choice_str}-{cv_folds}cv" f"-{random_str}"
 
         job_name = job_name[:trim_limit]
 
@@ -217,7 +203,4 @@ def validate_region(region):
         region = region[0]
 
     if region not in ["us-east-1", "us-west-2"]:
-        raise Exception(
-            "Unsupported region based on demo data location,"
-            " please switch to us-east-1 or us-west-2"
-        )
+        raise Exception("Unsupported region based on demo data location," " please switch to us-east-1 or us-west-2")
diff --git a/source/examples/rapids-sagemaker-hpo/notebook.ipynb b/source/examples/rapids-sagemaker-hpo/notebook.ipynb
index 9ab5d7b0..47c2a1fe 100644
--- a/source/examples/rapids-sagemaker-hpo/notebook.ipynb
+++ b/source/examples/rapids-sagemaker-hpo/notebook.ipynb
@@ -778,9 +778,7 @@
    },
    "outputs": [],
    "source": [
-    "ecr_fullname = (\n",
-    "    f\"{account[0]}.dkr.ecr.{region[0]}.amazonaws.com/{image_base}:{image_tag}\"\n",
-    ")"
+    "ecr_fullname = f\"{account[0]}.dkr.ecr.{region[0]}.amazonaws.com/{image_base}:{image_tag}\""
    ]
   },
   {
@@ -1991,9 +1989,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "endpoint_model = sagemaker.model.Model(\n",
-    "    image_uri=ecr_fullname, role=execution_role, model_data=s3_path_to_best_model\n",
-    ")"
+    "endpoint_model = sagemaker.model.Model(image_uri=ecr_fullname, role=execution_role, model_data=s3_path_to_best_model)"
    ]
   },
   {
@@ -2049,9 +2045,7 @@
     "DEMO_SERVING_FLAG = True\n",
     "\n",
     "if DEMO_SERVING_FLAG:\n",
-    "    endpoint_model.deploy(\n",
-    "        initial_instance_count=1, instance_type=\"ml.g4dn.2xlarge\"\n",
-    "    )  #'ml.p3.2xlarge'"
+    "    endpoint_model.deploy(initial_instance_count=1, instance_type=\"ml.g4dn.2xlarge\")  #'ml.p3.2xlarge'"
    ]
   },
   {
diff --git a/source/examples/rapids-sagemaker-hpo/serve.py b/source/examples/rapids-sagemaker-hpo/serve.py
index 380fe867..b8a01437 100644
--- a/source/examples/rapids-sagemaker-hpo/serve.py
+++ b/source/examples/rapids-sagemaker-hpo/serve.py
@@ -123,8 +123,7 @@ def predict():
 
         except Exception:
             return Response(
-                response="Unable to parse input data"
-                "[ should be json/string encoded list of arrays ]",
+                response="Unable to parse input data" "[ should be json/string encoded list of arrays ]",
                 status=415,
                 mimetype="text/csv",
             )
@@ -135,9 +134,7 @@ def predict():
         try:
             start_time = time.perf_counter()
             if model_type == "XGBoost":
-                app.logger.info(
-                    "running inference using XGBoost model :" f"{model_filename}"
-                )
+                app.logger.info("running inference using XGBoost model :" f"{model_filename}")
 
                 if GPU_INFERENCE_FLAG:
                     predictions = reloaded_model.predict(query_data)
@@ -148,28 +145,18 @@ def predict():
                 predictions = (predictions > xgboost_threshold) * 1.0
 
             elif model_type == "RandomForest":
-                app.logger.info(
-                    "running inference using RandomForest model :" f"{model_filename}"
-                )
+                app.logger.info("running inference using RandomForest model :" f"{model_filename}")
 
                 if "gpu" in model_filename and not GPU_INFERENCE_FLAG:
-                    raise Exception(
-                        "attempting to run CPU inference "
-                        "on a GPU trained RandomForest model"
-                    )
+                    raise Exception("attempting to run CPU inference " "on a GPU trained RandomForest model")
 
                 predictions = reloaded_model.predict(query_data.astype("float32"))
 
             elif model_type == "KMeans":
-                app.logger.info(
-                    "running inference using KMeans model :" f"{model_filename}"
-                )
+                app.logger.info("running inference using KMeans model :" f"{model_filename}")
 
                 if "gpu" in model_filename and not GPU_INFERENCE_FLAG:
-                    raise Exception(
-                        "attempting to run CPU inference "
-                        "on a GPU trained KMeans model"
-                    )
+                    raise Exception("attempting to run CPU inference " "on a GPU trained KMeans model")
 
                 predictions = reloaded_model.predict(query_data.astype("float32"))
 
diff --git a/source/examples/rapids-sagemaker-hpo/train.py b/source/examples/rapids-sagemaker-hpo/train.py
index 7b25053a..4239e79a 100644
--- a/source/examples/rapids-sagemaker-hpo/train.py
+++ b/source/examples/rapids-sagemaker-hpo/train.py
@@ -35,9 +35,7 @@ def train():
         dataset = ml_workflow.handle_missing_data(dataset)
 
         # split into train and test set
-        X_train, X_test, y_train, y_test = ml_workflow.split_dataset(
-            dataset, random_state=i_fold
-        )
+        X_train, X_test, y_train, y_test = ml_workflow.split_dataset(dataset, random_state=i_fold)
 
         # train model
         trained_model = ml_workflow.fit(X_train, y_train)
@@ -61,9 +59,7 @@ def train():
 def configure_logging():
     hpo_log = logging.getLogger("hpo_log")
     log_handler = logging.StreamHandler()
-    log_handler.setFormatter(
-        logging.Formatter("%(asctime)-15s %(levelname)8s %(name)s %(message)s")
-    )
+    log_handler.setFormatter(logging.Formatter("%(asctime)-15s %(levelname)8s %(name)s %(message)s"))
     hpo_log.addHandler(log_handler)
     hpo_log.setLevel(logging.DEBUG)
     hpo_log.propagate = False
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
index f9ca0ed6..25388834 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
@@ -64,9 +64,7 @@ def cluster_initialize(self):
         dask.config.set(
             {
                 "temporary_directory": self.hpo_config.output_artifacts_directory,
-                "logging": {
-                    "loggers": {"distributed.nanny": {"level": "CRITICAL"}}
-                },  # noqa
+                "logging": {"loggers": {"distributed.nanny": {"level": "CRITICAL"}}},  # noqa
             }
         )
 
@@ -82,9 +80,7 @@ def ingest_data(self):
         if "Parquet" in self.hpo_config.input_file_type:
             hpo_log.info("> parquet data ingestion")
 
-            dataset = dask.dataframe.read_parquet(
-                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
-            )
+            dataset = dask.dataframe.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)
 
         elif "CSV" in self.hpo_config.input_file_type:
             hpo_log.info("> csv data ingestion")
@@ -212,9 +208,7 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(
-                self.hpo_config.model_store_directory, filename
-            )
+            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_mcpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
index 15ec66ef..f0840f52 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
@@ -70,9 +70,7 @@ def cluster_initialize(self):
         dask.config.set(
             {
                 "temporary_directory": self.hpo_config.output_artifacts_directory,
-                "logging": {
-                    "loggers": {"distributed.nanny": {"level": "CRITICAL"}}
-                },  # noqa
+                "logging": {"loggers": {"distributed.nanny": {"level": "CRITICAL"}}},  # noqa
             }
         )
 
@@ -88,9 +86,7 @@ def ingest_data(self):
         if "Parquet" in self.hpo_config.input_file_type:
             hpo_log.info("> parquet data ingestion")
 
-            dataset = dask_cudf.read_parquet(
-                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
-            )
+            dataset = dask_cudf.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)
 
         elif "CSV" in self.hpo_config.input_file_type:
             hpo_log.info("> csv data ingestion")
@@ -189,9 +185,7 @@ def predict(self, trained_model, X_test, threshold=0.5):
         hpo_log.info("> predict with trained model ")
         if "XGBoost" in self.hpo_config.model_type:
             dtest = xgboost.dask.DaskDMatrix(self.client, X_test)
-            predictions = xgboost.dask.predict(
-                self.client, trained_model, dtest
-            ).compute()
+            predictions = xgboost.dask.predict(self.client, trained_model, dtest).compute()
 
             predictions = (predictions > threshold) * 1.0
 
@@ -223,9 +217,7 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(
-                self.hpo_config.model_store_directory, filename
-            )
+            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
 
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_mgpu_xgb")
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
index 47fe8768..6345ec7b 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
@@ -166,9 +166,7 @@ def predict(self, trained_model, X_test, threshold=0.5):
     def score(self, y_test, predictions):
         """Score predictions vs ground truth labels on test data"""
         dataset_dtype = self.hpo_config.dataset_dtype
-        score = accuracy_score(
-            y_test.astype(dataset_dtype), predictions.astype(dataset_dtype)
-        )
+        score = accuracy_score(y_test.astype(dataset_dtype), predictions.astype(dataset_dtype))
 
         hpo_log.info(f"\t score = {score}")
         self.cv_fold_scores.append(score)
@@ -180,9 +178,7 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(
-                self.hpo_config.model_store_directory, filename
-            )
+            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_scpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
index d9cc6674..a0895086 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
@@ -53,9 +53,7 @@ def ingest_data(self):
             return self.dataset_cache
 
         if "Parquet" in self.hpo_config.input_file_type:
-            dataset = cudf.read_parquet(
-                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
-            )  # noqa
+            dataset = cudf.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)  # noqa
 
         elif "CSV" in self.hpo_config.input_file_type:
             if isinstance(self.hpo_config.target_files, list):
@@ -64,14 +62,9 @@ def ingest_data(self):
                 filepath = self.hpo_config.target_files
 
             hpo_log.info(self.hpo_config.dataset_columns)
-            dataset = cudf.read_csv(
-                filepath, names=self.hpo_config.dataset_columns, header=0
-            )
+            dataset = cudf.read_csv(filepath, names=self.hpo_config.dataset_columns, header=0)
 
-        hpo_log.info(
-            f"ingested {self.hpo_config.input_file_type} dataset;"
-            f" shape = {dataset.shape}"
-        )
+        hpo_log.info(f"ingested {self.hpo_config.input_file_type} dataset;" f" shape = {dataset.shape}")
 
         self.dataset_cache = dataset
         return dataset
@@ -93,9 +86,7 @@ def split_dataset(self, dataset, random_state):
         hpo_log.info("> train-test split")
         label_column = self.hpo_config.label_column
 
-        X_train, X_test, y_train, y_test = train_test_split(
-            dataset, label_column, random_state=random_state
-        )
+        X_train, X_test, y_train, y_test = train_test_split(dataset, label_column, random_state=random_state)
 
         return (
             X_train.astype(self.hpo_config.dataset_dtype),
@@ -157,9 +148,7 @@ def predict(self, trained_model, X_test, threshold=0.5):
     def score(self, y_test, predictions):
         """Score predictions vs ground truth labels on test data"""
         dataset_dtype = self.hpo_config.dataset_dtype
-        score = accuracy_score(
-            y_test.astype(dataset_dtype), predictions.astype(dataset_dtype)
-        )
+        score = accuracy_score(y_test.astype(dataset_dtype), predictions.astype(dataset_dtype))
 
         hpo_log.info(f"score = {round(score,5)}")
         self.cv_fold_scores.append(score)
@@ -171,9 +160,7 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("saving high-scoring model")
-            output_filename = os.path.join(
-                self.hpo_config.model_store_directory, filename
-            )
+            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_sgpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/time-series-forecasting-with-hpo/notebook.ipynb b/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
index a85dd241..89e9dbfd 100644
--- a/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
+++ b/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
@@ -364,9 +364,7 @@
    "source": [
     "train_df = cudf.read_csv(raw_data_dir / \"sales_train_evaluation.csv\")\n",
     "prices_df = cudf.read_csv(raw_data_dir / \"sell_prices.csv\")\n",
-    "calendar_df = cudf.read_csv(raw_data_dir / \"calendar.csv\").rename(\n",
-    "    columns={\"d\": \"day_id\"}\n",
-    ")"
+    "calendar_df = cudf.read_csv(raw_data_dir / \"calendar.csv\").rename(columns={\"d\": \"day_id\"})"
    ]
   },
   {
@@ -1404,9 +1402,7 @@
    ],
    "source": [
     "index_columns = [\"id\", \"item_id\", \"dept_id\", \"cat_id\", \"store_id\", \"state_id\"]\n",
-    "grid_df = cudf.melt(\n",
-    "    train_df, id_vars=index_columns, var_name=\"day_id\", value_name=TARGET\n",
-    ")\n",
+    "grid_df = cudf.melt(train_df, id_vars=index_columns, var_name=\"day_id\", value_name=TARGET)\n",
     "grid_df"
    ]
   },
@@ -1627,15 +1623,11 @@
     "    temp_df[\"day_id\"] = \"d_\" + str(END_TRAIN + i)\n",
     "    temp_df[TARGET] = np.nan  # Sales amount at time (n + i) is unknown\n",
     "    add_grid = cudf.concat([add_grid, temp_df])\n",
-    "add_grid[\"day_id\"] = add_grid[\"day_id\"].astype(\n",
-    "    \"category\"\n",
-    ")  # The day_id column is categorical, after cudf.melt\n",
+    "add_grid[\"day_id\"] = add_grid[\"day_id\"].astype(\"category\")  # The day_id column is categorical, after cudf.melt\n",
     "\n",
     "grid_df = cudf.concat([grid_df, add_grid])\n",
     "grid_df = grid_df.reset_index(drop=True)\n",
-    "grid_df[\"sales\"] = grid_df[\"sales\"].astype(\n",
-    "    np.float32\n",
-    ")  # Use float32 type for sales column, to conserve memory\n",
+    "grid_df[\"sales\"] = grid_df[\"sales\"].astype(np.float32)  # Use float32 type for sales column, to conserve memory\n",
     "grid_df"
    ]
   },
@@ -2082,9 +2074,7 @@
     }
    ],
    "source": [
-    "release_df = (\n",
-    "    prices_df.groupby([\"store_id\", \"item_id\"])[\"wm_yr_wk\"].agg(\"min\").reset_index()\n",
-    ")\n",
+    "release_df = prices_df.groupby([\"store_id\", \"item_id\"])[\"wm_yr_wk\"].agg(\"min\").reset_index()\n",
     "release_df.columns = [\"store_id\", \"item_id\", \"release_week\"]\n",
     "release_df"
    ]
@@ -3115,9 +3105,7 @@
    ],
    "source": [
     "grid_df = grid_df[grid_df[\"wm_yr_wk\"] >= grid_df[\"release_week\"]].reset_index(drop=True)\n",
-    "grid_df[\"wm_yr_wk\"] = grid_df[\"wm_yr_wk\"].astype(\n",
-    "    np.int32\n",
-    ")  # Convert wm_yr_wk column to int32, to conserve memory\n",
+    "grid_df[\"wm_yr_wk\"] = grid_df[\"wm_yr_wk\"].astype(np.int32)  # Convert wm_yr_wk column to int32, to conserve memory\n",
     "grid_df"
    ]
   },
@@ -3430,21 +3418,13 @@
    "outputs": [],
    "source": [
     "# Highest price over all weeks\n",
-    "prices_df[\"price_max\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"max\")\n",
+    "prices_df[\"price_max\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"max\")\n",
     "# Lowest price over all weeks\n",
-    "prices_df[\"price_min\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"min\")\n",
+    "prices_df[\"price_min\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"min\")\n",
     "# Standard deviation of the price\n",
-    "prices_df[\"price_std\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"std\")\n",
+    "prices_df[\"price_std\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"std\")\n",
     "# Mean (average) price over all weeks\n",
-    "prices_df[\"price_mean\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"mean\")"
+    "prices_df[\"price_mean\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"mean\")"
    ]
   },
   {
@@ -3484,9 +3464,7 @@
    },
    "outputs": [],
    "source": [
-    "prices_df[\"price_nunique\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"nunique\")"
+    "prices_df[\"price_nunique\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"nunique\")"
    ]
   },
   {
@@ -3506,9 +3484,7 @@
    },
    "outputs": [],
    "source": [
-    "prices_df[\"item_nunique\"] = prices_df.groupby([\"store_id\", \"sell_price\"])[\n",
-    "    \"item_id\"\n",
-    "].transform(\"nunique\")"
+    "prices_df[\"item_nunique\"] = prices_df.groupby([\"store_id\", \"sell_price\"])[\"item_id\"].transform(\"nunique\")"
    ]
   },
   {
@@ -3770,9 +3746,7 @@
    "outputs": [],
    "source": [
     "# Add \"month\" and \"year\" columns to prices_df\n",
-    "week_to_month_map = calendar_df[[\"wm_yr_wk\", \"month\", \"year\"]].drop_duplicates(\n",
-    "    subset=[\"wm_yr_wk\"]\n",
-    ")\n",
+    "week_to_month_map = calendar_df[[\"wm_yr_wk\", \"month\", \"year\"]].drop_duplicates(subset=[\"wm_yr_wk\"])\n",
     "prices_df = prices_df.merge(week_to_month_map, on=[\"wm_yr_wk\"], how=\"left\")\n",
     "\n",
     "# Sort by wm_yr_wk. The rows will also be sorted in ascending months and years.\n",
@@ -3789,17 +3763,17 @@
    "outputs": [],
    "source": [
     "# Compare with the average price in the previous week\n",
-    "prices_df[\"price_momentum\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
-    "    [\"store_id\", \"item_id\"]\n",
-    ")[\"sell_price\"].shift(1)\n",
+    "prices_df[\"price_momentum\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].shift(\n",
+    "    1\n",
+    ")\n",
     "# Compare with the average price in the previous month\n",
-    "prices_df[\"price_momentum_m\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
-    "    [\"store_id\", \"item_id\", \"month\"]\n",
-    ")[\"sell_price\"].transform(\"mean\")\n",
+    "prices_df[\"price_momentum_m\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\", \"month\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"mean\")\n",
     "# Compare with the average price in the previous year\n",
-    "prices_df[\"price_momentum_y\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
-    "    [\"store_id\", \"item_id\", \"year\"]\n",
-    ")[\"sell_price\"].transform(\"mean\")"
+    "prices_df[\"price_momentum_y\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\", \"year\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"mean\")"
    ]
   },
   {
@@ -4153,12 +4127,8 @@
     "# After merging price_df, keep columns id and day_id from grid_df and drop all other columns from grid_df\n",
     "original_columns = list(grid_df)\n",
     "grid_df_with_price = grid_df.copy()\n",
-    "grid_df_with_price = grid_df_with_price.merge(\n",
-    "    prices_df, on=[\"store_id\", \"item_id\", \"wm_yr_wk\"], how=\"left\"\n",
-    ")\n",
-    "columns_to_keep = [\"id\", \"day_id\"] + [\n",
-    "    col for col in list(grid_df_with_price) if col not in original_columns\n",
-    "]\n",
+    "grid_df_with_price = grid_df_with_price.merge(prices_df, on=[\"store_id\", \"item_id\", \"wm_yr_wk\"], how=\"left\")\n",
+    "columns_to_keep = [\"id\", \"day_id\"] + [col for col in list(grid_df_with_price) if col not in original_columns]\n",
     "grid_df_with_price = grid_df_with_price[[\"id\", \"day_id\"] + columns_to_keep]\n",
     "grid_df_with_price"
    ]
@@ -4425,9 +4395,7 @@
     "    \"snap_TX\",\n",
     "    \"snap_WI\",\n",
     "]\n",
-    "grid_df_with_calendar = grid_df_id_only.merge(\n",
-    "    calendar_df[icols], on=[\"day_id\"], how=\"left\"\n",
-    ")\n",
+    "grid_df_with_calendar = grid_df_id_only.merge(calendar_df[icols], on=[\"day_id\"], how=\"left\")\n",
     "grid_df_with_calendar"
    ]
   },
@@ -4777,22 +4745,14 @@
     "import cupy as cp\n",
     "\n",
     "grid_df_with_calendar[\"tm_d\"] = grid_df_with_calendar[\"date\"].dt.day.astype(np.int8)\n",
-    "grid_df_with_calendar[\"tm_w\"] = (\n",
-    "    grid_df_with_calendar[\"date\"].dt.isocalendar().week.astype(np.int8)\n",
-    ")\n",
+    "grid_df_with_calendar[\"tm_w\"] = grid_df_with_calendar[\"date\"].dt.isocalendar().week.astype(np.int8)\n",
     "grid_df_with_calendar[\"tm_m\"] = grid_df_with_calendar[\"date\"].dt.month.astype(np.int8)\n",
     "grid_df_with_calendar[\"tm_y\"] = grid_df_with_calendar[\"date\"].dt.year\n",
-    "grid_df_with_calendar[\"tm_y\"] = (\n",
-    "    grid_df_with_calendar[\"tm_y\"] - grid_df_with_calendar[\"tm_y\"].min()\n",
-    ").astype(np.int8)\n",
-    "grid_df_with_calendar[\"tm_wm\"] = cp.ceil(\n",
-    "    grid_df_with_calendar[\"tm_d\"].to_cupy() / 7\n",
-    ").astype(\n",
+    "grid_df_with_calendar[\"tm_y\"] = (grid_df_with_calendar[\"tm_y\"] - grid_df_with_calendar[\"tm_y\"].min()).astype(np.int8)\n",
+    "grid_df_with_calendar[\"tm_wm\"] = cp.ceil(grid_df_with_calendar[\"tm_d\"].to_cupy() / 7).astype(\n",
     "    np.int8\n",
     ")  # which week in tje month?\n",
-    "grid_df_with_calendar[\"tm_dw\"] = grid_df_with_calendar[\"date\"].dt.dayofweek.astype(\n",
-    "    np.int8\n",
-    ")  # which day in the week?\n",
+    "grid_df_with_calendar[\"tm_dw\"] = grid_df_with_calendar[\"date\"].dt.dayofweek.astype(np.int8)  # which day in the week?\n",
     "grid_df_with_calendar[\"tm_w_end\"] = (grid_df_with_calendar[\"tm_dw\"] >= 5).astype(\n",
     "    np.int8\n",
     ")  # whether today is in the weekend\n",
@@ -4852,10 +4812,7 @@
     "grid_df_lags = grid_df_lags.sort_values([\"id\", \"day_id\"])\n",
     "\n",
     "grid_df_lags = grid_df_lags.assign(\n",
-    "    **{\n",
-    "        f\"sales_lag_{ld}\": grid_df_lags.groupby([\"id\"])[\"sales\"].shift(ld)\n",
-    "        for ld in LAG_DAYS\n",
-    "    }\n",
+    "    **{f\"sales_lag_{ld}\": grid_df_lags.groupby([\"id\"])[\"sales\"].shift(ld) for ld in LAG_DAYS}\n",
     ")"
    ]
   },
@@ -5249,18 +5206,10 @@
     "for i in [7, 14, 30, 60, 180]:\n",
     "    print(f\"    Window size: {i}\")\n",
     "    grid_df_lags[f\"rolling_mean_{i}\"] = (\n",
-    "        grid_df_lags.groupby([\"id\"])[\"sales\"]\n",
-    "        .shift(SHIFT_DAY)\n",
-    "        .rolling(i)\n",
-    "        .mean()\n",
-    "        .astype(np.float32)\n",
+    "        grid_df_lags.groupby([\"id\"])[\"sales\"].shift(SHIFT_DAY).rolling(i).mean().astype(np.float32)\n",
     "    )\n",
     "    grid_df_lags[f\"rolling_std_{i}\"] = (\n",
-    "        grid_df_lags.groupby([\"id\"])[\"sales\"]\n",
-    "        .shift(SHIFT_DAY)\n",
-    "        .rolling(i)\n",
-    "        .std()\n",
-    "        .astype(np.float32)\n",
+    "        grid_df_lags.groupby([\"id\"])[\"sales\"].shift(SHIFT_DAY).rolling(i).std().astype(np.float32)\n",
     "    )"
    ]
   },
@@ -5777,9 +5726,7 @@
     "icols = [[\"store_id\", \"dept_id\"], [\"item_id\", \"state_id\"]]\n",
     "new_columns = []\n",
     "\n",
-    "grid_df_target_enc = grid_df[\n",
-    "    [\"id\", \"day_id\", \"item_id\", \"state_id\", \"store_id\", \"dept_id\", \"sales\"]\n",
-    "].copy()\n",
+    "grid_df_target_enc = grid_df[[\"id\", \"day_id\", \"item_id\", \"state_id\", \"store_id\", \"dept_id\", \"sales\"]].copy()\n",
     "grid_df_target_enc[\"sales\"].fillna(value=0, inplace=True)\n",
     "\n",
     "for col in icols:\n",
@@ -6153,9 +6100,7 @@
     "    if dept is None:\n",
     "        grid1 = grid_df[grid_df[\"store_id\"] == store]\n",
     "    else:\n",
-    "        grid1 = grid_df[\n",
-    "            (grid_df[\"store_id\"] == store) & (grid_df[\"dept_id\"] == dept)\n",
-    "        ].drop(columns=[\"dept_id\"])\n",
+    "        grid1 = grid_df[(grid_df[\"store_id\"] == store) & (grid_df[\"dept_id\"] == dept)].drop(columns=[\"dept_id\"])\n",
     "    grid1 = grid1.drop(columns=[\"release_week\", \"wm_yr_wk\", \"store_id\", \"state_id\"])\n",
     "\n",
     "    grid2 = grid_df_with_price[[\"id\", \"day_id\"] + grid2_colnm]\n",
@@ -6176,13 +6121,7 @@
     "    gc.collect()\n",
     "\n",
     "    grid_combined = grid_combined.drop(columns=[\"id\"])\n",
-    "    grid_combined[\"day_id\"] = (\n",
-    "        grid_combined[\"day_id\"]\n",
-    "        .to_pandas()\n",
-    "        .astype(\"str\")\n",
-    "        .apply(lambda x: x[2:])\n",
-    "        .astype(np.int16)\n",
-    "    )\n",
+    "    grid_combined[\"day_id\"] = grid_combined[\"day_id\"].to_pandas().astype(\"str\").apply(lambda x: x[2:]).astype(np.int16)\n",
     "\n",
     "    return grid_combined"
    ]
@@ -6287,9 +6226,7 @@
     "for store in STORES:\n",
     "    print(f\"Processing store {store}...\")\n",
     "    segment_df = prepare_data(store=store)\n",
-    "    segment_df.to_pandas().to_pickle(\n",
-    "        segmented_data_dir / f\"combined_df_store_{store}.pkl\"\n",
-    "    )\n",
+    "    segment_df.to_pandas().to_pickle(segmented_data_dir / f\"combined_df_store_{store}.pkl\")\n",
     "    del segment_df\n",
     "    gc.collect()\n",
     "\n",
@@ -6297,9 +6234,7 @@
     "    for dept in DEPTS:\n",
     "        print(f\"Processing (store {store}, department {dept})...\")\n",
     "        segment_df = prepare_data(store=store, dept=dept)\n",
-    "        segment_df.to_pandas().to_pickle(\n",
-    "            segmented_data_dir / f\"combined_df_store_{store}_dept_{dept}.pkl\"\n",
-    "        )\n",
+    "        segment_df.to_pandas().to_pickle(segmented_data_dir / f\"combined_df_store_{store}_dept_{dept}.pkl\")\n",
     "        del segment_df\n",
     "        gc.collect()"
    ]
@@ -7029,11 +6964,7 @@
     "    df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
     "\n",
     "    # Compute denominator: 1/(n-1) * sum( (y(t) - y(t-1))**2 )\n",
-    "    diff = (\n",
-    "        df_train.sort_values([\"item_id\", \"day_id\"])\n",
-    "        .groupby([\"item_id\"])[[\"sales\"]]\n",
-    "        .diff(1)\n",
-    "    )\n",
+    "    diff = df_train.sort_values([\"item_id\", \"day_id\"]).groupby([\"item_id\"])[[\"sales\"]].diff(1)\n",
     "    x = (\n",
     "        df_train[[\"item_id\", \"day_id\"]]\n",
     "        .join(diff, how=\"left\")\n",
@@ -7108,9 +7039,7 @@
     "        \"alpha\": trial.suggest_float(\"alpha\", 1e-8, 100.0, log=True),\n",
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 6, step=1),\n",
-    "        \"min_child_weight\": trial.suggest_float(\n",
-    "            \"min_child_weight\", 1e-8, 100, log=True\n",
-    "        ),\n",
+    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
     "        \"tweedie_variance_power\": trial.suggest_float(\"tweedie_variance_power\", 1, 2),\n",
     "    }\n",
@@ -7121,29 +7050,19 @@
     "        with fs.open(f\"{bucket_name}/combined_df_store_{store}.pkl\", \"rb\") as f:\n",
     "            df = cudf.DataFrame(pd.read_pickle(f))\n",
     "        for train_mask, valid_mask in cv_folds:\n",
-    "            df_train = df[\n",
-    "                (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
-    "            ]\n",
-    "            df_valid = df[\n",
-    "                (df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])\n",
-    "            ]\n",
+    "            df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
+    "            df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
     "\n",
     "            X_train, y_train = (\n",
-    "                df_train.drop(\n",
-    "                    columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
-    "                ),\n",
+    "                df_train.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
     "                df_train[\"sales\"],\n",
     "            )\n",
-    "            X_valid = df_valid.drop(\n",
-    "                columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
-    "            )\n",
+    "            X_valid = df_valid.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "\n",
     "            clf = xgb.XGBRegressor(**params)\n",
     "            clf.fit(X_train, y_train)\n",
     "            pred_sales = clf.predict(X_valid)\n",
-    "            scores[store_id].append(\n",
-    "                wrmsse(product_weights, df, pred_sales, train_mask, valid_mask)\n",
-    "            )\n",
+    "            scores[store_id].append(wrmsse(product_weights, df, pred_sales, train_mask, valid_mask))\n",
     "            del df_train, df_valid, X_train, y_train, clf\n",
     "            gc.collect()\n",
     "        del df\n",
@@ -7238,9 +7157,7 @@
     "    for fut in partition[\"futures\"]:\n",
     "        _ = fut.result()  # Ensure that the training job was successful\n",
     "    tnow = time.perf_counter()\n",
-    "    print(\n",
-    "        f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\"\n",
-    "    )\n",
+    "    print(f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\")\n",
     "tend = time.perf_counter()\n",
     "print(f\"Total time elapsed = {tend - tstart}\")"
    ]
@@ -7481,9 +7398,7 @@
     "    df_test = df[(df[\"day_id\"] >= holdout[0]) & (df[\"day_id\"] < holdout[1])]\n",
     "    X_test = df_test.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "    pred_sales = model[store].predict(X_test)\n",
-    "    test_wrmsse += wrmsse(\n",
-    "        product_weights, df, pred_sales, train_mask=[0, 1914], valid_mask=holdout\n",
-    "    )\n",
+    "    test_wrmsse += wrmsse(product_weights, df, pred_sales, train_mask=[0, 1914], valid_mask=holdout)\n",
     "print(f\"WRMSSE metric on the held-out test set: {test_wrmsse}\")"
    ]
   },
@@ -7538,9 +7453,7 @@
     "        \"alpha\": trial.suggest_float(\"alpha\", 1e-8, 100.0, log=True),\n",
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 6, step=1),\n",
-    "        \"min_child_weight\": trial.suggest_float(\n",
-    "            \"min_child_weight\", 1e-8, 100, log=True\n",
-    "        ),\n",
+    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
     "        \"tweedie_variance_power\": trial.suggest_float(\"tweedie_variance_power\", 1, 2),\n",
     "    }\n",
@@ -7549,25 +7462,17 @@
     "    for store_id, store in enumerate(STORES):\n",
     "        for dept_id, dept in enumerate(DEPTS):\n",
     "            print(f\"Processing store {store}, department {dept}...\")\n",
-    "            with fs.open(\n",
-    "                f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
-    "            ) as f:\n",
+    "            with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
     "                df = cudf.DataFrame(pd.read_pickle(f))\n",
     "            for train_mask, valid_mask in cv_folds:\n",
-    "                df_train = df[\n",
-    "                    (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
-    "                ]\n",
-    "                df_valid = df[\n",
-    "                    (df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])\n",
-    "                ]\n",
+    "                df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
+    "                df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
     "\n",
     "                X_train, y_train = (\n",
     "                    df_train.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
     "                    df_train[\"sales\"],\n",
     "                )\n",
-    "                X_valid = df_valid.drop(\n",
-    "                    columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
-    "                )\n",
+    "                X_valid = df_valid.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "\n",
     "                clf = xgb.XGBRegressor(**params)\n",
     "                clf.fit(X_train, y_train)\n",
@@ -7661,9 +7566,7 @@
     "    for fut in partition[\"futures\"]:\n",
     "        _ = fut.result()  # Ensure that the training job was successful\n",
     "    tnow = time.perf_counter()\n",
-    "    print(\n",
-    "        f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\"\n",
-    "    )\n",
+    "    print(f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\")\n",
     "tend = time.perf_counter()\n",
     "print(f\"Total time elapsed = {tend - tstart}\")"
    ]
@@ -7749,14 +7652,10 @@
     "    for _, store in enumerate(STORES):\n",
     "        for _, dept in enumerate(DEPTS):\n",
     "            print(f\"Processing store {store}, department {dept}...\")\n",
-    "            with fs.open(\n",
-    "                f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
-    "            ) as f:\n",
+    "            with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
     "                df = cudf.DataFrame(pd.read_pickle(f))\n",
     "            for train_mask, _ in cv_folds:\n",
-    "                df_train = df[\n",
-    "                    (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
-    "                ]\n",
+    "                df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
     "                X_train, y_train = (\n",
     "                    df_train.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
     "                    df_train[\"sales\"],\n",
@@ -7939,16 +7838,12 @@
     "    df_test[\"pred2\"] = [np.nan] * len(df_test)\n",
     "    df_test[\"pred2\"] = df_test[\"pred2\"].astype(\"float32\")\n",
     "    for dept in DEPTS:\n",
-    "        with fs.open(\n",
-    "            f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
-    "        ) as f:\n",
+    "        with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
     "            df2 = cudf.DataFrame(pd.read_pickle(f))\n",
     "        df2_test = df2[(df2[\"day_id\"] >= holdout[0]) & (df2[\"day_id\"] < holdout[1])]\n",
     "        X_test = df2_test.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "        assert np.sum(df_test[\"dept_id\"] == dept) == len(X_test)\n",
-    "        df_test[\"pred2\"][df_test[\"dept_id\"] == dept] = model_alt[(store, dept)].predict(\n",
-    "            X_test\n",
-    "        )\n",
+    "        df_test[\"pred2\"][df_test[\"dept_id\"] == dept] = model_alt[(store, dept)].predict(X_test)\n",
     "\n",
     "    # Average prediction\n",
     "    df_test[\"avg_pred\"] = (df_test[\"pred1\"] + df_test[\"pred2\"]) / 2.0\n",
diff --git a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
index 73cf685e..6bb57c30 100644
--- a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
+++ b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
@@ -1380,9 +1380,7 @@
     "\n",
     "pp = pprint.PrettyPrinter()\n",
     "\n",
-    "pp.pprint(\n",
-    "    client.scheduler_info()\n",
-    ")  # will show some information of the GPUs of the workers"
+    "pp.pprint(client.scheduler_info())  # will show some information of the GPUs of the workers"
    ]
   },
   {
@@ -1703,9 +1701,7 @@
     "    taxi_data = taxi_data[fields]\n",
     "    taxi_data = taxi_data.reset_index()\n",
     "\n",
-    "    return persist_train_infer_split(\n",
-    "        client, taxi_data, response_dtype, response_id, infer_frac, random_state\n",
-    "    )"
+    "    return persist_train_infer_split(client, taxi_data, response_dtype, response_id, infer_frac, random_state)"
    ]
   },
   {
@@ -2166,9 +2162,7 @@
    "source": [
     "data_train = xgb.dask.DaskDMatrix(client, X_train, y_train)\n",
     "tic = timer()\n",
-    "xgboost_output = xgb.dask.train(\n",
-    "    client, params, data_train, num_boost_round=params[\"num_boost_rounds\"]\n",
-    ")\n",
+    "xgboost_output = xgb.dask.train(client, params, data_train, num_boost_round=params[\"num_boost_rounds\"])\n",
     "xgb_gpu_model = xgboost_output[\"booster\"]\n",
     "toc = timer()\n",
     "print(f\"Wall clock time taken for this cell : {toc-tic} s\")"
@@ -2448,9 +2442,7 @@
    ],
    "source": [
     "tic = timer()\n",
-    "predictions = X_infer.map_partitions(\n",
-    "    predict_model, meta=\"float\"\n",
-    ")  # this is like MPI reduce\n",
+    "predictions = X_infer.map_partitions(predict_model, meta=\"float\")  # this is like MPI reduce\n",
     "y_pred = predictions.compute()\n",
     "wait(y_pred)\n",
     "toc = timer()\n",
@@ -2472,9 +2464,7 @@
    ],
    "source": [
     "rows_csv = X_infer.iloc[:, 0].shape[0].compute()\n",
-    "print(\n",
-    "    f\"It took {toc-tic} seconds to predict on {rows_csv} rows using FIL distributedly on each worker\"\n",
-    ")"
+    "print(f\"It took {toc-tic} seconds to predict on {rows_csv} rows using FIL distributedly on each worker\")"
    ]
   },
   {
diff --git a/source/examples/xgboost-dask-databricks/notebook.ipynb b/source/examples/xgboost-dask-databricks/notebook.ipynb
index a7e63b4a..8a707187 100644
--- a/source/examples/xgboost-dask-databricks/notebook.ipynb
+++ b/source/examples/xgboost-dask-databricks/notebook.ipynb
@@ -480,9 +480,7 @@
     "# Check if the file already exists\n",
     "if not os.path.exists(file_path):\n",
     "    # If not, download dataset to the directory\n",
-    "    data_url = (\n",
-    "        \"https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\"\n",
-    "    )\n",
+    "    data_url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\"\n",
     "    download_command = f\"curl {data_url} --output {file_path}\"\n",
     "    subprocess.run(download_command, shell=True)\n",
     "\n",
@@ -1254,12 +1252,8 @@
     "    y = ddf[\"label\"]\n",
     "    X = ddf[ddf.columns.difference([\"label\"])]\n",
     "\n",
-    "    X_train, X_valid, y_train, y_valid = train_test_split(\n",
-    "        X, y, test_size=0.33, random_state=42\n",
-    "    )\n",
-    "    X_train, X_valid, y_train, y_valid = client.persist(\n",
-    "        [X_train, X_valid, y_train, y_valid]\n",
-    "    )\n",
+    "    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33, random_state=42)\n",
+    "    X_train, X_valid, y_train, y_valid = client.persist([X_train, X_valid, y_train, y_valid])\n",
     "    wait([X_train, X_valid, y_train, y_valid])\n",
     "\n",
     "    return X_train, X_valid, y_train, y_valid"
@@ -1690,9 +1684,7 @@
     "    # Use early stopping with custom objective and metric.\n",
     "    early_stopping_rounds = 5\n",
     "    # Specify the metric we want to use for early stopping.\n",
-    "    es = xgb.callback.EarlyStopping(\n",
-    "        rounds=early_stopping_rounds, save_best=True, metric_name=\"CustomErr\"\n",
-    "    )\n",
+    "    es = xgb.callback.EarlyStopping(rounds=early_stopping_rounds, save_best=True, metric_name=\"CustomErr\")\n",
     "\n",
     "    Xy = dxgb.DaskDeviceQuantileDMatrix(client, X, y)\n",
     "    Xy_valid = dxgb.DaskDMatrix(client, X_valid, y_valid)\n",
@@ -1742,9 +1734,7 @@
     }
    ],
    "source": [
-    "booster_custom = fit_model_customized_objective(\n",
-    "    client, X=X_train, y=y_train, X_valid=X_valid, y_valid=y_valid\n",
-    ")\n",
+    "booster_custom = fit_model_customized_objective(client, X=X_train, y=y_train, X_valid=X_valid, y_valid=y_valid)\n",
     "booster_custom"
    ]
   },
diff --git a/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb b/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
index 944b106f..2b900ce3 100644
--- a/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
@@ -315,10 +315,7 @@
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [\n",
-    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
-    "                for _ in range(*iter_range)\n",
-    "            ],\n",
+    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
@@ -412,9 +409,7 @@
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 10, step=1),\n",
     "        # minimum child weight, larger the term more conservative the tree.\n",
-    "        \"min_child_weight\": trial.suggest_float(\n",
-    "            \"min_child_weight\", 1e-8, 100, log=True\n",
-    "        ),\n",
+    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
     "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-8, 1.0, log=True),\n",
     "        # defines how selective algorithm is.\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
@@ -474,19 +469,14 @@
     "# Optimize in parallel on your Dask cluster\n",
     "backend_storage = optuna.storages.InMemoryStorage()\n",
     "dask_storage = optuna.integration.DaskStorage(storage=backend_storage, client=client)\n",
-    "study = optuna.create_study(\n",
-    "    direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage\n",
-    ")\n",
+    "study = optuna.create_study(direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage)\n",
     "futures = []\n",
     "for i in range(0, n_trials, n_workers * 4):\n",
     "    iter_range = (i, min([i + n_workers * 4, n_trials]))\n",
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [\n",
-    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
-    "                for _ in range(*iter_range)\n",
-    "            ],\n",
+    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
diff --git a/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb b/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
index 051464ac..4b1ab929 100644
--- a/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
@@ -1567,10 +1567,7 @@
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [\n",
-    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
-    "                for _ in range(*iter_range)\n",
-    "            ],\n",
+    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
@@ -1666,9 +1663,7 @@
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 10, step=1),\n",
     "        # minimum child weight, larger the term more conservative the tree.\n",
-    "        \"min_child_weight\": trial.suggest_float(\n",
-    "            \"min_child_weight\", 1e-8, 100, log=True\n",
-    "        ),\n",
+    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
     "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-8, 1.0, log=True),\n",
     "        # defines how selective algorithm is.\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
@@ -1730,19 +1725,14 @@
     "# Optimize in parallel on your Dask cluster\n",
     "backend_storage = optuna.storages.InMemoryStorage()\n",
     "dask_storage = optuna.integration.DaskStorage(storage=backend_storage, client=client)\n",
-    "study = optuna.create_study(\n",
-    "    direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage\n",
-    ")\n",
+    "study = optuna.create_study(direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage)\n",
     "futures = []\n",
     "for i in range(0, n_trials, n_workers * 4):\n",
     "    iter_range = (i, min([i + n_workers * 4, n_trials]))\n",
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [\n",
-    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
-    "                for _ in range(*iter_range)\n",
-    "            ],\n",
+    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
diff --git a/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb b/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
index 37200062..bcaeab88 100644
--- a/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
@@ -1,741 +1,727 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "id": "c1db247a-15ab-41b1-a124-152484a29f29",
-            "metadata": {
-                "tags": [
-                    "library/xgboost",
-                    "library/optuna",
-                    "library/dask",
-                    "library/dask-kubernetes",
-                    "library/scikit-learn",
-                    "workflow/hpo",
-                    "platforms/kubeflow",
-                    "dataset/nyc-taxi",
-                    "data-storage/gcs",
-                    "data-format/csv",
-                    "platforms/kubernetes"
-                ]
-            },
-            "source": [
-                "# Scaling up Hyperparameter Optimization with Multi-GPU Workload on Kubernetes"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "f7f02171-ed7b-48b4-9d55-32bb1149a3cf",
-            "metadata": {},
-            "source": [
-                "Choosing an optimal set of hyperparameters is a daunting task, especially for algorithms like XGBoost that have many hyperparameters to tune. In this notebook, we will speed up hyperparameter optimization by running multiple training jobs in parallel on a Kubernetes cluster. We handle larger data sets by splitting the data into multiple GPU devices."
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "a718e21f-5543-4f44-8a68-6ad8e78cb433",
-            "metadata": {},
-            "source": [
-                "## Prerequisites\n",
-                "Please follow instructions in [Dask Operator: Installation](../../tools/kubernetes/dask-operator) to install the Dask operator on top of a GPU-enabled Kubernetes cluster. (For the purpose of this example, you may ignore other sections of the linked document.\n",
-                "\n",
-                "### Optional: Kubeflow\n",
-                "Kubeflow gives you a nice notebook environment to run this notebook within the k8s cluster. Install Kubeflow by following instructions in [Installing Kubeflow](https://www.kubeflow.org/docs/started/installing-kubeflow/). You may choose any method; we tested this example after installing Kubeflow from manifests."
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "7b7f7bb3-5d53-4b8f-8472-bb974c8a597d",
-            "metadata": {},
-            "source": [
-                "## Install extra Python modules\n",
-                "We'll need a few extra Python modules."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 1,
-            "id": "27b79db5-bbcd-422c-80a7-af873eb47711",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Collecting dask_kubernetes\n",
-                        "  Downloading dask_kubernetes-2024.5.0-py3-none-any.whl.metadata (4.2 kB)\n",
-                        "Collecting optuna\n",
-                        "  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)\n",
-                        "Requirement already satisfied: dask>=2022.08.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (2024.1.1)\n",
-                        "Requirement already satisfied: distributed>=2022.08.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (2024.1.1)\n",
-                        "Collecting kopf>=1.35.3 (from dask_kubernetes)\n",
-                        "  Downloading kopf-1.37.2-py3-none-any.whl.metadata (9.7 kB)\n",
-                        "Collecting kr8s==0.14.* (from dask_kubernetes)\n",
-                        "  Downloading kr8s-0.14.4-py3-none-any.whl.metadata (6.7 kB)\n",
-                        "Collecting kubernetes-asyncio>=12.0.1 (from dask_kubernetes)\n",
-                        "  Downloading kubernetes_asyncio-29.0.0-py3-none-any.whl.metadata (1.3 kB)\n",
-                        "Collecting kubernetes>=12.0.1 (from dask_kubernetes)\n",
-                        "  Downloading kubernetes-29.0.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
-                        "Collecting pykube-ng>=22.9.0 (from dask_kubernetes)\n",
-                        "  Downloading pykube_ng-23.6.0-py3-none-any.whl.metadata (8.0 kB)\n",
-                        "Requirement already satisfied: rich>=12.5.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (13.7.1)\n",
-                        "Requirement already satisfied: anyio>=3.7.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (4.3.0)\n",
-                        "Collecting asyncache>=0.3.1 (from kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading asyncache-0.3.1-py3-none-any.whl.metadata (2.0 kB)\n",
-                        "Collecting cryptography>=35 (from kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (5.3 kB)\n",
-                        "Requirement already satisfied: exceptiongroup>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (1.2.0)\n",
-                        "Collecting httpx-ws>=0.5.1 (from kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading httpx_ws-0.6.0-py3-none-any.whl.metadata (7.8 kB)\n",
-                        "Requirement already satisfied: httpx>=0.24.1 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (0.27.0)\n",
-                        "Collecting python-box>=7.0.1 (from kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading python_box-7.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.8 kB)\n",
-                        "Collecting python-jsonpath>=0.7.1 (from kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading python_jsonpath-1.1.1-py3-none-any.whl.metadata (5.3 kB)\n",
-                        "Requirement already satisfied: pyyaml>=6.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (6.0.1)\n",
-                        "Collecting alembic>=1.5.0 (from optuna)\n",
-                        "  Downloading alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)\n",
-                        "Collecting colorlog (from optuna)\n",
-                        "  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)\n",
-                        "Requirement already satisfied: numpy in /opt/conda/lib/python3.11/site-packages (from optuna) (1.26.4)\n",
-                        "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.11/site-packages (from optuna) (24.0)\n",
-                        "Collecting sqlalchemy>=1.3.0 (from optuna)\n",
-                        "  Downloading SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
-                        "Requirement already satisfied: tqdm in /opt/conda/lib/python3.11/site-packages (from optuna) (4.66.2)\n",
-                        "Collecting Mako (from alembic>=1.5.0->optuna)\n",
-                        "  Downloading Mako-1.3.3-py3-none-any.whl.metadata (2.9 kB)\n",
-                        "Requirement already satisfied: typing-extensions>=4 in /opt/conda/lib/python3.11/site-packages (from alembic>=1.5.0->optuna) (4.11.0)\n",
-                        "Requirement already satisfied: click>=8.1 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (8.1.7)\n",
-                        "Requirement already satisfied: cloudpickle>=1.5.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (3.0.0)\n",
-                        "Requirement already satisfied: fsspec>=2021.09.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (2024.3.1)\n",
-                        "Requirement already satisfied: partd>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (1.4.1)\n",
-                        "Requirement already satisfied: toolz>=0.10.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (0.12.1)\n",
-                        "Requirement already satisfied: importlib-metadata>=4.13.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (7.1.0)\n",
-                        "Requirement already satisfied: jinja2>=2.10.3 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.1.3)\n",
-                        "Requirement already satisfied: locket>=1.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.0.0)\n",
-                        "Requirement already satisfied: msgpack>=1.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.0.7)\n",
-                        "Requirement already satisfied: psutil>=5.7.2 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (5.9.8)\n",
-                        "Requirement already satisfied: sortedcontainers>=2.0.5 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (2.4.0)\n",
-                        "Requirement already satisfied: tblib>=1.6.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.0.0)\n",
-                        "Requirement already satisfied: tornado>=6.0.4 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (6.4)\n",
-                        "Requirement already satisfied: urllib3>=1.24.3 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.26.18)\n",
-                        "Requirement already satisfied: zict>=3.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.0.0)\n",
-                        "Requirement already satisfied: python-json-logger in /opt/conda/lib/python3.11/site-packages (from kopf>=1.35.3->dask_kubernetes) (2.0.7)\n",
-                        "Collecting iso8601 (from kopf>=1.35.3->dask_kubernetes)\n",
-                        "  Downloading iso8601-2.1.0-py3-none-any.whl.metadata (3.7 kB)\n",
-                        "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.11/site-packages (from kopf>=1.35.3->dask_kubernetes) (3.9.5)\n",
-                        "Requirement already satisfied: certifi>=14.05.14 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2024.2.2)\n",
-                        "Requirement already satisfied: six>=1.9.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (1.16.0)\n",
-                        "Requirement already satisfied: python-dateutil>=2.5.3 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2.9.0)\n",
-                        "Collecting google-auth>=1.0.1 (from kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading google_auth-2.29.0-py2.py3-none-any.whl.metadata (4.7 kB)\n",
-                        "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (1.8.0)\n",
-                        "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2.31.0)\n",
-                        "Collecting requests-oauthlib (from kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)\n",
-                        "Collecting oauthlib>=3.2.2 (from kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)\n",
-                        "Requirement already satisfied: setuptools>=21.0.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes-asyncio>=12.0.1->dask_kubernetes) (69.5.1)\n",
-                        "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.11/site-packages (from rich>=12.5.1->dask_kubernetes) (3.0.0)\n",
-                        "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.11/site-packages (from rich>=12.5.1->dask_kubernetes) (2.17.2)\n",
-                        "Collecting greenlet!=0.4.17 (from sqlalchemy>=1.3.0->optuna)\n",
-                        "  Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
-                        "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.3.1)\n",
-                        "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (23.2.0)\n",
-                        "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.4.1)\n",
-                        "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (6.0.5)\n",
-                        "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.9.4)\n",
-                        "Requirement already satisfied: idna>=2.8 in /opt/conda/lib/python3.11/site-packages (from anyio>=3.7.0->kr8s==0.14.*->dask_kubernetes) (3.7)\n",
-                        "Requirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.11/site-packages (from anyio>=3.7.0->kr8s==0.14.*->dask_kubernetes) (1.3.1)\n",
-                        "Requirement already satisfied: cachetools<6.0.0,>=5.2.0 in /opt/conda/lib/python3.11/site-packages (from asyncache>=0.3.1->kr8s==0.14.*->dask_kubernetes) (5.3.3)\n",
-                        "Requirement already satisfied: cffi>=1.12 in /opt/conda/lib/python3.11/site-packages (from cryptography>=35->kr8s==0.14.*->dask_kubernetes) (1.16.0)\n",
-                        "Collecting pyasn1-modules>=0.2.1 (from google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading pyasn1_modules-0.4.0-py3-none-any.whl.metadata (3.4 kB)\n",
-                        "Collecting rsa<5,>=3.1.4 (from google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading rsa-4.9-py3-none-any.whl.metadata (4.2 kB)\n",
-                        "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.11/site-packages (from httpx>=0.24.1->kr8s==0.14.*->dask_kubernetes) (1.0.5)\n",
-                        "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.24.1->kr8s==0.14.*->dask_kubernetes) (0.14.0)\n",
-                        "Collecting wsproto (from httpx-ws>=0.5.1->kr8s==0.14.*->dask_kubernetes)\n",
-                        "  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n",
-                        "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.11/site-packages (from importlib-metadata>=4.13.0->dask>=2022.08.1->dask_kubernetes) (3.17.0)\n",
-                        "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.11/site-packages (from jinja2>=2.10.3->distributed>=2022.08.1->dask_kubernetes) (2.1.5)\n",
-                        "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=12.5.1->dask_kubernetes) (0.1.2)\n",
-                        "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->kubernetes>=12.0.1->dask_kubernetes) (3.3.2)\n",
-                        "Requirement already satisfied: pycparser in /opt/conda/lib/python3.11/site-packages (from cffi>=1.12->cryptography>=35->kr8s==0.14.*->dask_kubernetes) (2.22)\n",
-                        "Collecting pyasn1<0.7.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
-                        "  Downloading pyasn1-0.6.0-py2.py3-none-any.whl.metadata (8.3 kB)\n",
-                        "Downloading dask_kubernetes-2024.5.0-py3-none-any.whl (157 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.2/157.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
-                        "\u001b[?25hDownloading kr8s-0.14.4-py3-none-any.whl (60 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading optuna-3.6.1-py3-none-any.whl (380 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m380.1/380.1 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-                        "\u001b[?25hDownloading alembic-1.13.1-py3-none-any.whl (233 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.4/233.4 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading kopf-1.37.2-py3-none-any.whl (207 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.8/207.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading kubernetes-29.0.0-py2.py3-none-any.whl (1.6 MB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-                        "\u001b[?25hDownloading kubernetes_asyncio-29.0.0-py3-none-any.whl (2.0 MB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading pykube_ng-23.6.0-py3-none-any.whl (26 kB)\n",
-                        "Downloading SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m122.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)\n",
-                        "Downloading asyncache-0.3.1-py3-none-any.whl (3.7 kB)\n",
-                        "Downloading cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl (3.8 MB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m125.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading google_auth-2.29.0-py2.py3-none-any.whl (189 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m189.2/189.2 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (620 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.0/620.0 kB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading httpx_ws-0.6.0-py3-none-any.whl (13 kB)\n",
-                        "Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m151.7/151.7 kB\u001b[0m \u001b[31m24.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading python_box-7.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.3/4.3 MB\u001b[0m \u001b[31m131.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading python_jsonpath-1.1.1-py3-none-any.whl (51 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.5/51.5 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading iso8601-2.1.0-py3-none-any.whl (7.5 kB)\n",
-                        "Downloading Mako-1.3.3-py3-none-any.whl (78 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.8/78.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)\n",
-                        "Downloading pyasn1_modules-0.4.0-py3-none-any.whl (181 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.2/181.2 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hDownloading rsa-4.9-py3-none-any.whl (34 kB)\n",
-                        "Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n",
-                        "Downloading pyasn1-0.6.0-py2.py3-none-any.whl (85 kB)\n",
-                        "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.3/85.3 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-                        "\u001b[?25hInstalling collected packages: wsproto, python-jsonpath, python-box, pyasn1, oauthlib, Mako, iso8601, greenlet, colorlog, asyncache, sqlalchemy, rsa, requests-oauthlib, pykube-ng, pyasn1-modules, cryptography, kubernetes-asyncio, kopf, httpx-ws, google-auth, alembic, optuna, kubernetes, kr8s, dask_kubernetes\n",
-                        "Successfully installed Mako-1.3.3 alembic-1.13.1 asyncache-0.3.1 colorlog-6.8.2 cryptography-42.0.7 dask_kubernetes-2024.5.0 google-auth-2.29.0 greenlet-3.0.3 httpx-ws-0.6.0 iso8601-2.1.0 kopf-1.37.2 kr8s-0.14.4 kubernetes-29.0.0 kubernetes-asyncio-29.0.0 oauthlib-3.2.2 optuna-3.6.1 pyasn1-0.6.0 pyasn1-modules-0.4.0 pykube-ng-23.6.0 python-box-7.1.1 python-jsonpath-1.1.1 requests-oauthlib-2.0.0 rsa-4.9 sqlalchemy-2.0.30 wsproto-1.2.0\n"
-                    ]
-                }
-            ],
-            "source": [
-                "!pip install dask_kubernetes optuna"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "acc8f524-dc9b-41d7-8faa-3aea23ee1983",
-            "metadata": {},
-            "source": [
-                "## Import Python modules"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 10,
-            "id": "0c8a1ffb-0b03-4d4a-9ab1-0561bf5533d9",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "import threading\n",
-                "import warnings\n",
-                "\n",
-                "import cupy as cp\n",
-                "import cuspatial\n",
-                "import dask_cudf\n",
-                "import optuna\n",
-                "from cuml.dask.common import utils as dask_utils\n",
-                "from dask.distributed import Client, wait\n",
-                "from dask_kubernetes.operator import KubeCluster\n",
-                "from dask_ml.metrics import mean_squared_error\n",
-                "from dask_ml.model_selection import KFold\n",
-                "from xgboost import dask as dxgb"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "b2d61e0b-229b-40c0-889d-b8242e574fc8",
-            "metadata": {},
-            "source": [
-                "## Set up multiple Dask clusters\n",
-                "\n",
-                "To run multi-GPU training jobs in parallel, we will create multiple Dask clusters each controlling its share of GPUs. It's best to think of each Dask cluster as a portion of the compute resource of the Kubernetes cluster.\n",
-                "\n",
-                "Fill in the following variables:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 29,
-            "id": "d1c22c3c-51b2-4526-b1fa-ac012f616e13",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "n_clusters=2\n",
-                        "n_worker_per_dask_cluster=2\n",
-                        "n_node_per_dask_cluster=3\n"
-                    ]
-                }
-            ],
-            "source": [
-                "# Number of nodes in the Kubernetes cluster.\n",
-                "# Each node is assumed to have a single NVIDIA GPU attached\n",
-                "n_nodes = 7\n",
-                "\n",
-                "# Number of worker nodes to be assigned to each Dask cluster\n",
-                "n_worker_per_dask_cluster = 2\n",
-                "\n",
-                "# Number of nodes to be assigned to each Dask cluster\n",
-                "# 1 is added since the Dask cluster's scheduler process needs to be mapped to its own node\n",
-                "n_node_per_dask_cluster = n_worker_per_dask_cluster + 1\n",
-                "\n",
-                "# Number of Dask clusters to be created\n",
-                "# Subtract 1 to account for the notebook pod (it requires its own node)\n",
-                "n_clusters = (n_nodes - 1) // n_node_per_dask_cluster\n",
-                "\n",
-                "print(f\"{n_clusters=}\")\n",
-                "if n_clusters == 0:\n",
-                "    raise ValueError(\n",
-                "        \"No cluster can be created. Reduce `n_worker_per_dask_cluster` or create more compute nodes\"\n",
-                "    )\n",
-                "print(f\"{n_worker_per_dask_cluster=}\")\n",
-                "print(f\"{n_node_per_dask_cluster=}\")\n",
-                "\n",
-                "n_node_active = n_clusters * n_node_per_dask_cluster + 1\n",
-                "if n_node_active != n_nodes:\n",
-                "    n_idle = n_nodes - n_node_active\n",
-                "    warnings.warn(f\"{n_idle} node(s) will not be used\", stacklevel=2)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "c0eee823-162f-47e9-be4c-41447b2d7ee9",
-            "metadata": {},
-            "source": [
-                "Once we've determined the number of Dask clusters and their size, we are now ready to launch them:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 30,
-            "id": "8d0b632a-b73d-4351-bb5d-8a1f4ab1a2c4",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "# Choose the same RAPIDS image you used for launching the notebook session\n",
-                "rapids_image = \"{{ rapids_notebook_container }}\""
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 33,
-            "id": "62aa9e52-c5b6-487c-8f02-88ea84980cfc",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "data": {
-                        "application/vnd.jupyter.widget-view+json": {
-                            "model_id": "e24e5095ae78458e804d5f1212372f9a",
-                            "version_major": 2,
-                            "version_minor": 0
-                        },
-                        "text/plain": [
-                            "Output()"
-                        ]
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Launching cluster 0...\n"
-                    ]
-                },
-                {
-                    "data": {
-                        "text/html": [
-                            "
\n"
-                        ],
-                        "text/plain": []
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                },
-                {
-                    "data": {
-                        "application/vnd.jupyter.widget-view+json": {
-                            "model_id": "240e689def1549c1b5dfd87284192e96",
-                            "version_major": 2,
-                            "version_minor": 0
-                        },
-                        "text/plain": [
-                            "Output()"
-                        ]
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Launching cluster 1...\n"
-                    ]
-                },
-                {
-                    "data": {
-                        "text/html": [
-                            "
\n"
-                        ],
-                        "text/plain": []
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                }
-            ],
-            "source": [
-                "clusters = []\n",
-                "for i in range(n_clusters):\n",
-                "    print(f\"Launching cluster {i}...\")\n",
-                "    clusters.append(\n",
-                "        KubeCluster(\n",
-                "            name=f\"rapids-dask{i}\",\n",
-                "            image=rapids_image,\n",
-                "            worker_command=\"dask-cuda-worker\",\n",
-                "            n_workers=2,\n",
-                "            resources={\"limits\": {\"nvidia.com/gpu\": \"1\"}},\n",
-                "            env={\"EXTRA_PIP_PACKAGES\": \"optuna\"},\n",
-                "        )\n",
-                "    )"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "f37fa67f-fa90-432c-bed3-8f2a8a095795",
-            "metadata": {},
-            "source": [
-                "## Set up Hyperparameter Optimization Task with NYC Taxi data\n",
-                "\n",
-                "Anaconda has graciously made some of the NYC Taxi dataset available in a public Google Cloud Storage bucket. We'll use our Cluster of GPUs to process it and train a model that predicts the fare amount. We'll use our Dask clusters to process it and train a model that predicts the fare amount."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 34,
-            "id": "c84929a5-f13b-4a61-9ed6-aa8060129e17",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "col_dtype = {\n",
-                "    \"VendorID\": \"int32\",\n",
-                "    \"tpep_pickup_datetime\": \"datetime64[ms]\",\n",
-                "    \"tpep_dropoff_datetime\": \"datetime64[ms]\",\n",
-                "    \"passenger_count\": \"int32\",\n",
-                "    \"trip_distance\": \"float32\",\n",
-                "    \"pickup_longitude\": \"float32\",\n",
-                "    \"pickup_latitude\": \"float32\",\n",
-                "    \"RatecodeID\": \"int32\",\n",
-                "    \"store_and_fwd_flag\": \"int32\",\n",
-                "    \"dropoff_longitude\": \"float32\",\n",
-                "    \"dropoff_latitude\": \"float32\",\n",
-                "    \"payment_type\": \"int32\",\n",
-                "    \"fare_amount\": \"float32\",\n",
-                "    \"extra\": \"float32\",\n",
-                "    \"mta_tax\": \"float32\",\n",
-                "    \"tip_amount\": \"float32\",\n",
-                "    \"total_amount\": \"float32\",\n",
-                "    \"tolls_amount\": \"float32\",\n",
-                "    \"improvement_surcharge\": \"float32\",\n",
-                "}\n",
-                "\n",
-                "\n",
-                "must_haves = {\n",
-                "    \"pickup_datetime\": \"datetime64[ms]\",\n",
-                "    \"dropoff_datetime\": \"datetime64[ms]\",\n",
-                "    \"passenger_count\": \"int32\",\n",
-                "    \"trip_distance\": \"float32\",\n",
-                "    \"pickup_longitude\": \"float32\",\n",
-                "    \"pickup_latitude\": \"float32\",\n",
-                "    \"rate_code\": \"int32\",\n",
-                "    \"dropoff_longitude\": \"float32\",\n",
-                "    \"dropoff_latitude\": \"float32\",\n",
-                "    \"fare_amount\": \"float32\",\n",
-                "}\n",
-                "\n",
-                "\n",
-                "def compute_haversine_distance(df):\n",
-                "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
-                "        df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
-                "    )\n",
-                "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
-                "        df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
-                "    )\n",
-                "    df[\"haversine_distance\"] = cuspatial.haversine_distance(pickup, dropoff)\n",
-                "    df[\"haversine_distance\"] = df[\"haversine_distance\"].astype(\"float32\")\n",
-                "    return df\n",
-                "\n",
-                "\n",
-                "def clean(ddf, must_haves):\n",
-                "    # replace the extraneous spaces in column names and lower the font type\n",
-                "    tmp = {col: col.strip().lower() for col in list(ddf.columns)}\n",
-                "    ddf = ddf.rename(columns=tmp)\n",
-                "\n",
-                "    ddf = ddf.rename(\n",
-                "        columns={\n",
-                "            \"tpep_pickup_datetime\": \"pickup_datetime\",\n",
-                "            \"tpep_dropoff_datetime\": \"dropoff_datetime\",\n",
-                "            \"ratecodeid\": \"rate_code\",\n",
-                "        }\n",
-                "    )\n",
-                "\n",
-                "    ddf[\"pickup_datetime\"] = ddf[\"pickup_datetime\"].astype(\"datetime64[ms]\")\n",
-                "    ddf[\"dropoff_datetime\"] = ddf[\"dropoff_datetime\"].astype(\"datetime64[ms]\")\n",
-                "\n",
-                "    for col in ddf.columns:\n",
-                "        if col not in must_haves:\n",
-                "            ddf = ddf.drop(columns=col)\n",
-                "            continue\n",
-                "        if ddf[col].dtype == \"object\":\n",
-                "            # Fixing error: could not convert arg to str\n",
-                "            ddf = ddf.drop(columns=col)\n",
-                "        else:\n",
-                "            # downcast from 64bit to 32bit types\n",
-                "            # Tesla T4 are faster on 32bit ops\n",
-                "            if \"int\" in str(ddf[col].dtype):\n",
-                "                ddf[col] = ddf[col].astype(\"int32\")\n",
-                "            if \"float\" in str(ddf[col].dtype):\n",
-                "                ddf[col] = ddf[col].astype(\"float32\")\n",
-                "            ddf[col] = ddf[col].fillna(-1)\n",
-                "\n",
-                "    return ddf\n",
-                "\n",
-                "\n",
-                "def prepare_data(client):\n",
-                "    taxi_df = dask_cudf.read_csv(\n",
-                "        \"https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-02.csv\",\n",
-                "        dtype=col_dtype,\n",
-                "    )\n",
-                "    taxi_df = taxi_df.map_partitions(clean, must_haves, meta=must_haves)\n",
-                "\n",
-                "    ## add features\n",
-                "    taxi_df[\"hour\"] = taxi_df[\"pickup_datetime\"].dt.hour.astype(\"int32\")\n",
-                "    taxi_df[\"year\"] = taxi_df[\"pickup_datetime\"].dt.year.astype(\"int32\")\n",
-                "    taxi_df[\"month\"] = taxi_df[\"pickup_datetime\"].dt.month.astype(\"int32\")\n",
-                "    taxi_df[\"day\"] = taxi_df[\"pickup_datetime\"].dt.day.astype(\"int32\")\n",
-                "    taxi_df[\"day_of_week\"] = taxi_df[\"pickup_datetime\"].dt.weekday.astype(\"int32\")\n",
-                "    taxi_df[\"is_weekend\"] = (taxi_df[\"day_of_week\"] >= 5).astype(\"int32\")\n",
-                "\n",
-                "    # calculate the time difference between dropoff and pickup.\n",
-                "    taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\n",
-                "        \"pickup_datetime\"\n",
-                "    ].astype(\"int32\")\n",
-                "    taxi_df[\"diff\"] = (taxi_df[\"diff\"] / 1000).astype(\"int32\")\n",
-                "\n",
-                "    taxi_df[\"pickup_latitude_r\"] = taxi_df[\"pickup_latitude\"] // 0.01 * 0.01\n",
-                "    taxi_df[\"pickup_longitude_r\"] = taxi_df[\"pickup_longitude\"] // 0.01 * 0.01\n",
-                "    taxi_df[\"dropoff_latitude_r\"] = taxi_df[\"dropoff_latitude\"] // 0.01 * 0.01\n",
-                "    taxi_df[\"dropoff_longitude_r\"] = taxi_df[\"dropoff_longitude\"] // 0.01 * 0.01\n",
-                "\n",
-                "    taxi_df = taxi_df.drop(\"pickup_datetime\", axis=1)\n",
-                "    taxi_df = taxi_df.drop(\"dropoff_datetime\", axis=1)\n",
-                "\n",
-                "    taxi_df = taxi_df.map_partitions(compute_haversine_distance)\n",
-                "\n",
-                "    X = (\n",
-                "        taxi_df.drop([\"fare_amount\"], axis=1)\n",
-                "        .astype(\"float32\")\n",
-                "        .to_dask_array(lengths=True)\n",
-                "    )\n",
-                "    y = taxi_df[\"fare_amount\"].astype(\"float32\").to_dask_array(lengths=True)\n",
-                "\n",
-                "    X._meta = cp.asarray(X._meta)\n",
-                "    y._meta = cp.asarray(y._meta)\n",
-                "\n",
-                "    X, y = dask_utils.persist_across_workers(client, [X, y])\n",
-                "    return X, y\n",
-                "\n",
-                "\n",
-                "def train_model(params):\n",
-                "    cluster = get_cluster(threading.get_ident())\n",
-                "\n",
-                "    default_params = {\n",
-                "        \"objective\": \"reg:squarederror\",\n",
-                "        \"eval_metric\": \"rmse\",\n",
-                "        \"verbosity\": 0,\n",
-                "        \"tree_method\": \"hist\",\n",
-                "        \"device\": \"cuda\",\n",
-                "    }\n",
-                "    params = dict(default_params, **params)\n",
-                "\n",
-                "    with Client(cluster) as client:\n",
-                "        X, y = prepare_data(client)\n",
-                "        wait([X, y])\n",
-                "\n",
-                "        scores = []\n",
-                "        kfold = KFold(n_splits=5, shuffle=False)\n",
-                "        for train_index, test_index in kfold.split(X, y):\n",
-                "            dtrain = dxgb.DaskQuantileDMatrix(client, X[train_index, :], y[train_index])\n",
-                "            dtest = dxgb.DaskQuantileDMatrix(client, X[test_index, :], y[test_index])\n",
-                "            model = dxgb.train(\n",
-                "                client,\n",
-                "                params,\n",
-                "                dtrain,\n",
-                "                num_boost_round=10,\n",
-                "                verbose_eval=False,\n",
-                "            )\n",
-                "            y_test_pred = dxgb.predict(client, model, dtest).to_backend(\"cupy\")\n",
-                "            rmse_score = mean_squared_error(y[test_index], y_test_pred, squared=False)\n",
-                "            scores.append(rmse_score)\n",
-                "        return sum(scores) / len(scores)\n",
-                "\n",
-                "\n",
-                "def objective(trial):\n",
-                "    params = {\n",
-                "        \"n_estimators\": trial.suggest_int(\"n_estimators\", 2, 4),\n",
-                "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.5, 0.7),\n",
-                "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.5, 1),\n",
-                "        \"colsample_bynode\": trial.suggest_float(\"colsample_bynode\", 0.5, 1),\n",
-                "        \"colsample_bylevel\": trial.suggest_float(\"colsample_bylevel\", 0.5, 1),\n",
-                "        \"reg_lambda\": trial.suggest_float(\"reg_lambda\", 0, 1),\n",
-                "        \"max_depth\": trial.suggest_int(\"max_depth\", 1, 6),\n",
-                "        \"max_leaves\": trial.suggest_int(\"max_leaves\", 0, 2),\n",
-                "        \"max_cat_to_onehot\": trial.suggest_int(\"max_cat_to_onehot\", 1, 10),\n",
-                "    }\n",
-                "    return train_model(params)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "0c401aa1-2aeb-43d9-955b-4dfd7b495fe9",
-            "metadata": {},
-            "source": [
-                "To kick off multiple training jobs in parallel, we will launch multiple threads, so that each thread controls a Dask cluster.\n",
-                "One important utility function is `get_cluster`, which returns the Dask cluster that's mapped to a given thread."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 35,
-            "id": "97cdeb8a-330e-4d96-92d4-d48c93828d9d",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "# Map each thread's integer ID to a sequential number (0, 1, 2 ...)\n",
-                "thread_id_map: dict[int, KubeCluster] = {}\n",
-                "thread_id_map_lock = threading.Lock()\n",
-                "\n",
-                "\n",
-                "def get_cluster(thread_id: int) -> KubeCluster:\n",
-                "    with thread_id_map_lock:\n",
-                "        try:\n",
-                "            return clusters[thread_id_map[thread_id]]\n",
-                "        except KeyError:\n",
-                "            seq_id = len(thread_id_map)\n",
-                "            thread_id_map[thread_id] = seq_id\n",
-                "            return clusters[seq_id]"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "2e7c923b-f4ea-4f38-b3a5-92dfcd47dfff",
-            "metadata": {},
-            "source": [
-                "Now we are ready to start hyperparameter optimization."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 36,
-            "id": "c557d769-0be6-4319-b7f5-8ad52b824961",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "[I 2024-05-09 07:53:00,718] A new study created in memory with name: no-name-da830427-bce3-4e42-98e6-c98c0c3da0d7\n"
-                    ]
-                }
-            ],
-            "source": [
-                "n_trials = (\n",
-                "    10  # set to a low number so that the demo finishes quickly. Feel free to adjust\n",
-                ")\n",
-                "study = optuna.create_study(direction=\"minimize\")"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 37,
-            "id": "94ece2d0-b3f7-44c8-9b4e-a2f60fd623b9",
-            "metadata": {
-                "tags": []
-            },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "[I 2024-05-09 07:54:10,229] Trial 1 finished with value: 59.449462890625 and parameters: {'n_estimators': 4, 'learning_rate': 0.6399993857892183, 'colsample_bytree': 0.7020623988319513, 'colsample_bynode': 0.777468318546648, 'colsample_bylevel': 0.7890749134903386, 'reg_lambda': 0.4464953694744921, 'max_depth': 3, 'max_leaves': 0, 'max_cat_to_onehot': 9}. Best is trial 1 with value: 59.449462890625.\n",
-                        "[I 2024-05-09 07:54:19,507] Trial 0 finished with value: 57.77985763549805 and parameters: {'n_estimators': 4, 'learning_rate': 0.674087333032356, 'colsample_bytree': 0.557642421113256, 'colsample_bynode': 0.9719449711676733, 'colsample_bylevel': 0.6984302171973646, 'reg_lambda': 0.7201514298169174, 'max_depth': 4, 'max_leaves': 1, 'max_cat_to_onehot': 4}. Best is trial 0 with value: 57.77985763549805.\n",
-                        "[I 2024-05-09 07:54:59,524] Trial 2 finished with value: 57.77985763549805 and parameters: {'n_estimators': 2, 'learning_rate': 0.6894880267544121, 'colsample_bytree': 0.8171662437182604, 'colsample_bynode': 0.549527686217645, 'colsample_bylevel': 0.890212178266078, 'reg_lambda': 0.5847298606135033, 'max_depth': 2, 'max_leaves': 1, 'max_cat_to_onehot': 5}. Best is trial 0 with value: 57.77985763549805.\n",
-                        "[I 2024-05-09 07:55:22,013] Trial 3 finished with value: 55.01234817504883 and parameters: {'n_estimators': 4, 'learning_rate': 0.6597614733926671, 'colsample_bytree': 0.8437061126308156, 'colsample_bynode': 0.621479934699203, 'colsample_bylevel': 0.8330951489228277, 'reg_lambda': 0.7830102753448884, 'max_depth': 2, 'max_leaves': 2, 'max_cat_to_onehot': 2}. Best is trial 3 with value: 55.01234817504883.\n",
-                        "[I 2024-05-09 07:56:00,678] Trial 4 finished with value: 57.77985763549805 and parameters: {'n_estimators': 4, 'learning_rate': 0.5994587326401378, 'colsample_bytree': 0.9799078215504886, 'colsample_bynode': 0.9766955839079614, 'colsample_bylevel': 0.5088864363378924, 'reg_lambda': 0.18103184809548734, 'max_depth': 3, 'max_leaves': 1, 'max_cat_to_onehot': 4}. Best is trial 3 with value: 55.01234817504883.\n",
-                        "[I 2024-05-09 07:56:11,773] Trial 5 finished with value: 54.936126708984375 and parameters: {'n_estimators': 2, 'learning_rate': 0.5208827661289628, 'colsample_bytree': 0.866258912492528, 'colsample_bynode': 0.6368815844513638, 'colsample_bylevel': 0.9539603435186208, 'reg_lambda': 0.21390618865079458, 'max_depth': 4, 'max_leaves': 2, 'max_cat_to_onehot': 4}. Best is trial 5 with value: 54.936126708984375.\n",
-                        "[I 2024-05-09 07:56:48,737] Trial 6 finished with value: 57.77985763549805 and parameters: {'n_estimators': 2, 'learning_rate': 0.6137888371528442, 'colsample_bytree': 0.9621063205689744, 'colsample_bynode': 0.5306812468481084, 'colsample_bylevel': 0.8527827651989199, 'reg_lambda': 0.3315799968401767, 'max_depth': 6, 'max_leaves': 1, 'max_cat_to_onehot': 9}. Best is trial 5 with value: 54.936126708984375.\n",
-                        "[I 2024-05-09 07:56:59,261] Trial 7 finished with value: 55.204200744628906 and parameters: {'n_estimators': 3, 'learning_rate': 0.6831416027240611, 'colsample_bytree': 0.5311840770388268, 'colsample_bynode': 0.9572535535110238, 'colsample_bylevel': 0.6846894032354778, 'reg_lambda': 0.6091211134408249, 'max_depth': 3, 'max_leaves': 2, 'max_cat_to_onehot': 5}. Best is trial 5 with value: 54.936126708984375.\n",
-                        "[I 2024-05-09 07:57:37,674] Trial 8 finished with value: 54.93584442138672 and parameters: {'n_estimators': 4, 'learning_rate': 0.620742285616388, 'colsample_bytree': 0.7969398985157778, 'colsample_bynode': 0.9049707375663323, 'colsample_bylevel': 0.7209693969245297, 'reg_lambda': 0.6158847054585023, 'max_depth': 1, 'max_leaves': 0, 'max_cat_to_onehot': 10}. Best is trial 8 with value: 54.93584442138672.\n",
-                        "[I 2024-05-09 07:57:50,310] Trial 9 finished with value: 57.76123809814453 and parameters: {'n_estimators': 3, 'learning_rate': 0.5475197727057007, 'colsample_bytree': 0.5381502848057452, 'colsample_bynode': 0.8514705732161596, 'colsample_bylevel': 0.9139277684007088, 'reg_lambda': 0.5117732009332318, 'max_depth': 4, 'max_leaves': 0, 'max_cat_to_onehot': 5}. Best is trial 8 with value: 54.93584442138672.\n"
-                    ]
-                }
-            ],
-            "source": [
-                "# With n_jobs parameter, Optuna will launch [n_clusters] threads internally\n",
-                "# Each thread will deploy a training job to a Dask cluster\n",
-                "study.optimize(objective, n_trials=n_trials, n_jobs=n_clusters)"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "ac5b3cba-87ba-4470-a166-b6a0815f85e4",
-            "metadata": {},
-            "outputs": [],
-            "source": []
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3 (ipykernel)",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.11.9"
-        }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c1db247a-15ab-41b1-a124-152484a29f29",
+   "metadata": {
+    "tags": [
+     "library/xgboost",
+     "library/optuna",
+     "library/dask",
+     "library/dask-kubernetes",
+     "library/scikit-learn",
+     "workflow/hpo",
+     "platforms/kubeflow",
+     "dataset/nyc-taxi",
+     "data-storage/gcs",
+     "data-format/csv",
+     "platforms/kubernetes"
+    ]
+   },
+   "source": [
+    "# Scaling up Hyperparameter Optimization with Multi-GPU Workload on Kubernetes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7f02171-ed7b-48b4-9d55-32bb1149a3cf",
+   "metadata": {},
+   "source": [
+    "Choosing an optimal set of hyperparameters is a daunting task, especially for algorithms like XGBoost that have many hyperparameters to tune. In this notebook, we will speed up hyperparameter optimization by running multiple training jobs in parallel on a Kubernetes cluster. We handle larger data sets by splitting the data into multiple GPU devices."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a718e21f-5543-4f44-8a68-6ad8e78cb433",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "Please follow instructions in [Dask Operator: Installation](../../tools/kubernetes/dask-operator) to install the Dask operator on top of a GPU-enabled Kubernetes cluster. (For the purpose of this example, you may ignore other sections of the linked document.\n",
+    "\n",
+    "### Optional: Kubeflow\n",
+    "Kubeflow gives you a nice notebook environment to run this notebook within the k8s cluster. Install Kubeflow by following instructions in [Installing Kubeflow](https://www.kubeflow.org/docs/started/installing-kubeflow/). You may choose any method; we tested this example after installing Kubeflow from manifests."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7b7f7bb3-5d53-4b8f-8472-bb974c8a597d",
+   "metadata": {},
+   "source": [
+    "## Install extra Python modules\n",
+    "We'll need a few extra Python modules."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "27b79db5-bbcd-422c-80a7-af873eb47711",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting dask_kubernetes\n",
+      "  Downloading dask_kubernetes-2024.5.0-py3-none-any.whl.metadata (4.2 kB)\n",
+      "Collecting optuna\n",
+      "  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)\n",
+      "Requirement already satisfied: dask>=2022.08.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (2024.1.1)\n",
+      "Requirement already satisfied: distributed>=2022.08.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (2024.1.1)\n",
+      "Collecting kopf>=1.35.3 (from dask_kubernetes)\n",
+      "  Downloading kopf-1.37.2-py3-none-any.whl.metadata (9.7 kB)\n",
+      "Collecting kr8s==0.14.* (from dask_kubernetes)\n",
+      "  Downloading kr8s-0.14.4-py3-none-any.whl.metadata (6.7 kB)\n",
+      "Collecting kubernetes-asyncio>=12.0.1 (from dask_kubernetes)\n",
+      "  Downloading kubernetes_asyncio-29.0.0-py3-none-any.whl.metadata (1.3 kB)\n",
+      "Collecting kubernetes>=12.0.1 (from dask_kubernetes)\n",
+      "  Downloading kubernetes-29.0.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
+      "Collecting pykube-ng>=22.9.0 (from dask_kubernetes)\n",
+      "  Downloading pykube_ng-23.6.0-py3-none-any.whl.metadata (8.0 kB)\n",
+      "Requirement already satisfied: rich>=12.5.1 in /opt/conda/lib/python3.11/site-packages (from dask_kubernetes) (13.7.1)\n",
+      "Requirement already satisfied: anyio>=3.7.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (4.3.0)\n",
+      "Collecting asyncache>=0.3.1 (from kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading asyncache-0.3.1-py3-none-any.whl.metadata (2.0 kB)\n",
+      "Collecting cryptography>=35 (from kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (5.3 kB)\n",
+      "Requirement already satisfied: exceptiongroup>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (1.2.0)\n",
+      "Collecting httpx-ws>=0.5.1 (from kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading httpx_ws-0.6.0-py3-none-any.whl.metadata (7.8 kB)\n",
+      "Requirement already satisfied: httpx>=0.24.1 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (0.27.0)\n",
+      "Collecting python-box>=7.0.1 (from kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading python_box-7.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.8 kB)\n",
+      "Collecting python-jsonpath>=0.7.1 (from kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading python_jsonpath-1.1.1-py3-none-any.whl.metadata (5.3 kB)\n",
+      "Requirement already satisfied: pyyaml>=6.0 in /opt/conda/lib/python3.11/site-packages (from kr8s==0.14.*->dask_kubernetes) (6.0.1)\n",
+      "Collecting alembic>=1.5.0 (from optuna)\n",
+      "  Downloading alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)\n",
+      "Collecting colorlog (from optuna)\n",
+      "  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)\n",
+      "Requirement already satisfied: numpy in /opt/conda/lib/python3.11/site-packages (from optuna) (1.26.4)\n",
+      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.11/site-packages (from optuna) (24.0)\n",
+      "Collecting sqlalchemy>=1.3.0 (from optuna)\n",
+      "  Downloading SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
+      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.11/site-packages (from optuna) (4.66.2)\n",
+      "Collecting Mako (from alembic>=1.5.0->optuna)\n",
+      "  Downloading Mako-1.3.3-py3-none-any.whl.metadata (2.9 kB)\n",
+      "Requirement already satisfied: typing-extensions>=4 in /opt/conda/lib/python3.11/site-packages (from alembic>=1.5.0->optuna) (4.11.0)\n",
+      "Requirement already satisfied: click>=8.1 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (8.1.7)\n",
+      "Requirement already satisfied: cloudpickle>=1.5.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (3.0.0)\n",
+      "Requirement already satisfied: fsspec>=2021.09.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (2024.3.1)\n",
+      "Requirement already satisfied: partd>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (1.4.1)\n",
+      "Requirement already satisfied: toolz>=0.10.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (0.12.1)\n",
+      "Requirement already satisfied: importlib-metadata>=4.13.0 in /opt/conda/lib/python3.11/site-packages (from dask>=2022.08.1->dask_kubernetes) (7.1.0)\n",
+      "Requirement already satisfied: jinja2>=2.10.3 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.1.3)\n",
+      "Requirement already satisfied: locket>=1.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.0.0)\n",
+      "Requirement already satisfied: msgpack>=1.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.0.7)\n",
+      "Requirement already satisfied: psutil>=5.7.2 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (5.9.8)\n",
+      "Requirement already satisfied: sortedcontainers>=2.0.5 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (2.4.0)\n",
+      "Requirement already satisfied: tblib>=1.6.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.0.0)\n",
+      "Requirement already satisfied: tornado>=6.0.4 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (6.4)\n",
+      "Requirement already satisfied: urllib3>=1.24.3 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (1.26.18)\n",
+      "Requirement already satisfied: zict>=3.0.0 in /opt/conda/lib/python3.11/site-packages (from distributed>=2022.08.1->dask_kubernetes) (3.0.0)\n",
+      "Requirement already satisfied: python-json-logger in /opt/conda/lib/python3.11/site-packages (from kopf>=1.35.3->dask_kubernetes) (2.0.7)\n",
+      "Collecting iso8601 (from kopf>=1.35.3->dask_kubernetes)\n",
+      "  Downloading iso8601-2.1.0-py3-none-any.whl.metadata (3.7 kB)\n",
+      "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.11/site-packages (from kopf>=1.35.3->dask_kubernetes) (3.9.5)\n",
+      "Requirement already satisfied: certifi>=14.05.14 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2024.2.2)\n",
+      "Requirement already satisfied: six>=1.9.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (1.16.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.5.3 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2.9.0)\n",
+      "Collecting google-auth>=1.0.1 (from kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading google_auth-2.29.0-py2.py3-none-any.whl.metadata (4.7 kB)\n",
+      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (1.8.0)\n",
+      "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from kubernetes>=12.0.1->dask_kubernetes) (2.31.0)\n",
+      "Collecting requests-oauthlib (from kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)\n",
+      "Collecting oauthlib>=3.2.2 (from kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)\n",
+      "Requirement already satisfied: setuptools>=21.0.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes-asyncio>=12.0.1->dask_kubernetes) (69.5.1)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.11/site-packages (from rich>=12.5.1->dask_kubernetes) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.11/site-packages (from rich>=12.5.1->dask_kubernetes) (2.17.2)\n",
+      "Collecting greenlet!=0.4.17 (from sqlalchemy>=1.3.0->optuna)\n",
+      "  Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.3.1)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (23.2.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.4.1)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (6.0.5)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp->kopf>=1.35.3->dask_kubernetes) (1.9.4)\n",
+      "Requirement already satisfied: idna>=2.8 in /opt/conda/lib/python3.11/site-packages (from anyio>=3.7.0->kr8s==0.14.*->dask_kubernetes) (3.7)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.11/site-packages (from anyio>=3.7.0->kr8s==0.14.*->dask_kubernetes) (1.3.1)\n",
+      "Requirement already satisfied: cachetools<6.0.0,>=5.2.0 in /opt/conda/lib/python3.11/site-packages (from asyncache>=0.3.1->kr8s==0.14.*->dask_kubernetes) (5.3.3)\n",
+      "Requirement already satisfied: cffi>=1.12 in /opt/conda/lib/python3.11/site-packages (from cryptography>=35->kr8s==0.14.*->dask_kubernetes) (1.16.0)\n",
+      "Collecting pyasn1-modules>=0.2.1 (from google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading pyasn1_modules-0.4.0-py3-none-any.whl.metadata (3.4 kB)\n",
+      "Collecting rsa<5,>=3.1.4 (from google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading rsa-4.9-py3-none-any.whl.metadata (4.2 kB)\n",
+      "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.11/site-packages (from httpx>=0.24.1->kr8s==0.14.*->dask_kubernetes) (1.0.5)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.24.1->kr8s==0.14.*->dask_kubernetes) (0.14.0)\n",
+      "Collecting wsproto (from httpx-ws>=0.5.1->kr8s==0.14.*->dask_kubernetes)\n",
+      "  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n",
+      "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.11/site-packages (from importlib-metadata>=4.13.0->dask>=2022.08.1->dask_kubernetes) (3.17.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.11/site-packages (from jinja2>=2.10.3->distributed>=2022.08.1->dask_kubernetes) (2.1.5)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=12.5.1->dask_kubernetes) (0.1.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->kubernetes>=12.0.1->dask_kubernetes) (3.3.2)\n",
+      "Requirement already satisfied: pycparser in /opt/conda/lib/python3.11/site-packages (from cffi>=1.12->cryptography>=35->kr8s==0.14.*->dask_kubernetes) (2.22)\n",
+      "Collecting pyasn1<0.7.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=12.0.1->dask_kubernetes)\n",
+      "  Downloading pyasn1-0.6.0-py2.py3-none-any.whl.metadata (8.3 kB)\n",
+      "Downloading dask_kubernetes-2024.5.0-py3-none-any.whl (157 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.2/157.2 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading kr8s-0.14.4-py3-none-any.whl (60 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading optuna-3.6.1-py3-none-any.whl (380 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m380.1/380.1 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading alembic-1.13.1-py3-none-any.whl (233 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.4/233.4 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading kopf-1.37.2-py3-none-any.whl (207 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.8/207.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading kubernetes-29.0.0-py2.py3-none-any.whl (1.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading kubernetes_asyncio-29.0.0-py3-none-any.whl (2.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pykube_ng-23.6.0-py3-none-any.whl (26 kB)\n",
+      "Downloading SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m122.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)\n",
+      "Downloading asyncache-0.3.1-py3-none-any.whl (3.7 kB)\n",
+      "Downloading cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl (3.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m125.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth-2.29.0-py2.py3-none-any.whl (189 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m189.2/189.2 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (620 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.0/620.0 kB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading httpx_ws-0.6.0-py3-none-any.whl (13 kB)\n",
+      "Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m151.7/151.7 kB\u001b[0m \u001b[31m24.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading python_box-7.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.3/4.3 MB\u001b[0m \u001b[31m131.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading python_jsonpath-1.1.1-py3-none-any.whl (51 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.5/51.5 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading iso8601-2.1.0-py3-none-any.whl (7.5 kB)\n",
+      "Downloading Mako-1.3.3-py3-none-any.whl (78 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.8/78.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)\n",
+      "Downloading pyasn1_modules-0.4.0-py3-none-any.whl (181 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.2/181.2 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading rsa-4.9-py3-none-any.whl (34 kB)\n",
+      "Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n",
+      "Downloading pyasn1-0.6.0-py2.py3-none-any.whl (85 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.3/85.3 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: wsproto, python-jsonpath, python-box, pyasn1, oauthlib, Mako, iso8601, greenlet, colorlog, asyncache, sqlalchemy, rsa, requests-oauthlib, pykube-ng, pyasn1-modules, cryptography, kubernetes-asyncio, kopf, httpx-ws, google-auth, alembic, optuna, kubernetes, kr8s, dask_kubernetes\n",
+      "Successfully installed Mako-1.3.3 alembic-1.13.1 asyncache-0.3.1 colorlog-6.8.2 cryptography-42.0.7 dask_kubernetes-2024.5.0 google-auth-2.29.0 greenlet-3.0.3 httpx-ws-0.6.0 iso8601-2.1.0 kopf-1.37.2 kr8s-0.14.4 kubernetes-29.0.0 kubernetes-asyncio-29.0.0 oauthlib-3.2.2 optuna-3.6.1 pyasn1-0.6.0 pyasn1-modules-0.4.0 pykube-ng-23.6.0 python-box-7.1.1 python-jsonpath-1.1.1 requests-oauthlib-2.0.0 rsa-4.9 sqlalchemy-2.0.30 wsproto-1.2.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install dask_kubernetes optuna"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "acc8f524-dc9b-41d7-8faa-3aea23ee1983",
+   "metadata": {},
+   "source": [
+    "## Import Python modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0c8a1ffb-0b03-4d4a-9ab1-0561bf5533d9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import threading\n",
+    "import warnings\n",
+    "\n",
+    "import cupy as cp\n",
+    "import cuspatial\n",
+    "import dask_cudf\n",
+    "import optuna\n",
+    "from cuml.dask.common import utils as dask_utils\n",
+    "from dask.distributed import Client, wait\n",
+    "from dask_kubernetes.operator import KubeCluster\n",
+    "from dask_ml.metrics import mean_squared_error\n",
+    "from dask_ml.model_selection import KFold\n",
+    "from xgboost import dask as dxgb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b2d61e0b-229b-40c0-889d-b8242e574fc8",
+   "metadata": {},
+   "source": [
+    "## Set up multiple Dask clusters\n",
+    "\n",
+    "To run multi-GPU training jobs in parallel, we will create multiple Dask clusters each controlling its share of GPUs. It's best to think of each Dask cluster as a portion of the compute resource of the Kubernetes cluster.\n",
+    "\n",
+    "Fill in the following variables:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "d1c22c3c-51b2-4526-b1fa-ac012f616e13",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "n_clusters=2\n",
+      "n_worker_per_dask_cluster=2\n",
+      "n_node_per_dask_cluster=3\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Number of nodes in the Kubernetes cluster.\n",
+    "# Each node is assumed to have a single NVIDIA GPU attached\n",
+    "n_nodes = 7\n",
+    "\n",
+    "# Number of worker nodes to be assigned to each Dask cluster\n",
+    "n_worker_per_dask_cluster = 2\n",
+    "\n",
+    "# Number of nodes to be assigned to each Dask cluster\n",
+    "# 1 is added since the Dask cluster's scheduler process needs to be mapped to its own node\n",
+    "n_node_per_dask_cluster = n_worker_per_dask_cluster + 1\n",
+    "\n",
+    "# Number of Dask clusters to be created\n",
+    "# Subtract 1 to account for the notebook pod (it requires its own node)\n",
+    "n_clusters = (n_nodes - 1) // n_node_per_dask_cluster\n",
+    "\n",
+    "print(f\"{n_clusters=}\")\n",
+    "if n_clusters == 0:\n",
+    "    raise ValueError(\"No cluster can be created. Reduce `n_worker_per_dask_cluster` or create more compute nodes\")\n",
+    "print(f\"{n_worker_per_dask_cluster=}\")\n",
+    "print(f\"{n_node_per_dask_cluster=}\")\n",
+    "\n",
+    "n_node_active = n_clusters * n_node_per_dask_cluster + 1\n",
+    "if n_node_active != n_nodes:\n",
+    "    n_idle = n_nodes - n_node_active\n",
+    "    warnings.warn(f\"{n_idle} node(s) will not be used\", stacklevel=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0eee823-162f-47e9-be4c-41447b2d7ee9",
+   "metadata": {},
+   "source": [
+    "Once we've determined the number of Dask clusters and their size, we are now ready to launch them:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "8d0b632a-b73d-4351-bb5d-8a1f4ab1a2c4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Choose the same RAPIDS image you used for launching the notebook session\n",
+    "rapids_image = \"{{ rapids_notebook_container }}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "62aa9e52-c5b6-487c-8f02-88ea84980cfc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e24e5095ae78458e804d5f1212372f9a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
-    "nbformat": 4,
-    "nbformat_minor": 5
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Launching cluster 0...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "240e689def1549c1b5dfd87284192e96",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Launching cluster 1...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "clusters = []\n",
+    "for i in range(n_clusters):\n",
+    "    print(f\"Launching cluster {i}...\")\n",
+    "    clusters.append(\n",
+    "        KubeCluster(\n",
+    "            name=f\"rapids-dask{i}\",\n",
+    "            image=rapids_image,\n",
+    "            worker_command=\"dask-cuda-worker\",\n",
+    "            n_workers=2,\n",
+    "            resources={\"limits\": {\"nvidia.com/gpu\": \"1\"}},\n",
+    "            env={\"EXTRA_PIP_PACKAGES\": \"optuna\"},\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f37fa67f-fa90-432c-bed3-8f2a8a095795",
+   "metadata": {},
+   "source": [
+    "## Set up Hyperparameter Optimization Task with NYC Taxi data\n",
+    "\n",
+    "Anaconda has graciously made some of the NYC Taxi dataset available in a public Google Cloud Storage bucket. We'll use our Cluster of GPUs to process it and train a model that predicts the fare amount. We'll use our Dask clusters to process it and train a model that predicts the fare amount."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "c84929a5-f13b-4a61-9ed6-aa8060129e17",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "col_dtype = {\n",
+    "    \"VendorID\": \"int32\",\n",
+    "    \"tpep_pickup_datetime\": \"datetime64[ms]\",\n",
+    "    \"tpep_dropoff_datetime\": \"datetime64[ms]\",\n",
+    "    \"passenger_count\": \"int32\",\n",
+    "    \"trip_distance\": \"float32\",\n",
+    "    \"pickup_longitude\": \"float32\",\n",
+    "    \"pickup_latitude\": \"float32\",\n",
+    "    \"RatecodeID\": \"int32\",\n",
+    "    \"store_and_fwd_flag\": \"int32\",\n",
+    "    \"dropoff_longitude\": \"float32\",\n",
+    "    \"dropoff_latitude\": \"float32\",\n",
+    "    \"payment_type\": \"int32\",\n",
+    "    \"fare_amount\": \"float32\",\n",
+    "    \"extra\": \"float32\",\n",
+    "    \"mta_tax\": \"float32\",\n",
+    "    \"tip_amount\": \"float32\",\n",
+    "    \"total_amount\": \"float32\",\n",
+    "    \"tolls_amount\": \"float32\",\n",
+    "    \"improvement_surcharge\": \"float32\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "must_haves = {\n",
+    "    \"pickup_datetime\": \"datetime64[ms]\",\n",
+    "    \"dropoff_datetime\": \"datetime64[ms]\",\n",
+    "    \"passenger_count\": \"int32\",\n",
+    "    \"trip_distance\": \"float32\",\n",
+    "    \"pickup_longitude\": \"float32\",\n",
+    "    \"pickup_latitude\": \"float32\",\n",
+    "    \"rate_code\": \"int32\",\n",
+    "    \"dropoff_longitude\": \"float32\",\n",
+    "    \"dropoff_latitude\": \"float32\",\n",
+    "    \"fare_amount\": \"float32\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def compute_haversine_distance(df):\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
+    "    df[\"haversine_distance\"] = cuspatial.haversine_distance(pickup, dropoff)\n",
+    "    df[\"haversine_distance\"] = df[\"haversine_distance\"].astype(\"float32\")\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def clean(ddf, must_haves):\n",
+    "    # replace the extraneous spaces in column names and lower the font type\n",
+    "    tmp = {col: col.strip().lower() for col in list(ddf.columns)}\n",
+    "    ddf = ddf.rename(columns=tmp)\n",
+    "\n",
+    "    ddf = ddf.rename(\n",
+    "        columns={\n",
+    "            \"tpep_pickup_datetime\": \"pickup_datetime\",\n",
+    "            \"tpep_dropoff_datetime\": \"dropoff_datetime\",\n",
+    "            \"ratecodeid\": \"rate_code\",\n",
+    "        }\n",
+    "    )\n",
+    "\n",
+    "    ddf[\"pickup_datetime\"] = ddf[\"pickup_datetime\"].astype(\"datetime64[ms]\")\n",
+    "    ddf[\"dropoff_datetime\"] = ddf[\"dropoff_datetime\"].astype(\"datetime64[ms]\")\n",
+    "\n",
+    "    for col in ddf.columns:\n",
+    "        if col not in must_haves:\n",
+    "            ddf = ddf.drop(columns=col)\n",
+    "            continue\n",
+    "        if ddf[col].dtype == \"object\":\n",
+    "            # Fixing error: could not convert arg to str\n",
+    "            ddf = ddf.drop(columns=col)\n",
+    "        else:\n",
+    "            # downcast from 64bit to 32bit types\n",
+    "            # Tesla T4 are faster on 32bit ops\n",
+    "            if \"int\" in str(ddf[col].dtype):\n",
+    "                ddf[col] = ddf[col].astype(\"int32\")\n",
+    "            if \"float\" in str(ddf[col].dtype):\n",
+    "                ddf[col] = ddf[col].astype(\"float32\")\n",
+    "            ddf[col] = ddf[col].fillna(-1)\n",
+    "\n",
+    "    return ddf\n",
+    "\n",
+    "\n",
+    "def prepare_data(client):\n",
+    "    taxi_df = dask_cudf.read_csv(\n",
+    "        \"https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-02.csv\",\n",
+    "        dtype=col_dtype,\n",
+    "    )\n",
+    "    taxi_df = taxi_df.map_partitions(clean, must_haves, meta=must_haves)\n",
+    "\n",
+    "    ## add features\n",
+    "    taxi_df[\"hour\"] = taxi_df[\"pickup_datetime\"].dt.hour.astype(\"int32\")\n",
+    "    taxi_df[\"year\"] = taxi_df[\"pickup_datetime\"].dt.year.astype(\"int32\")\n",
+    "    taxi_df[\"month\"] = taxi_df[\"pickup_datetime\"].dt.month.astype(\"int32\")\n",
+    "    taxi_df[\"day\"] = taxi_df[\"pickup_datetime\"].dt.day.astype(\"int32\")\n",
+    "    taxi_df[\"day_of_week\"] = taxi_df[\"pickup_datetime\"].dt.weekday.astype(\"int32\")\n",
+    "    taxi_df[\"is_weekend\"] = (taxi_df[\"day_of_week\"] >= 5).astype(\"int32\")\n",
+    "\n",
+    "    # calculate the time difference between dropoff and pickup.\n",
+    "    taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\"pickup_datetime\"].astype(\"int32\")\n",
+    "    taxi_df[\"diff\"] = (taxi_df[\"diff\"] / 1000).astype(\"int32\")\n",
+    "\n",
+    "    taxi_df[\"pickup_latitude_r\"] = taxi_df[\"pickup_latitude\"] // 0.01 * 0.01\n",
+    "    taxi_df[\"pickup_longitude_r\"] = taxi_df[\"pickup_longitude\"] // 0.01 * 0.01\n",
+    "    taxi_df[\"dropoff_latitude_r\"] = taxi_df[\"dropoff_latitude\"] // 0.01 * 0.01\n",
+    "    taxi_df[\"dropoff_longitude_r\"] = taxi_df[\"dropoff_longitude\"] // 0.01 * 0.01\n",
+    "\n",
+    "    taxi_df = taxi_df.drop(\"pickup_datetime\", axis=1)\n",
+    "    taxi_df = taxi_df.drop(\"dropoff_datetime\", axis=1)\n",
+    "\n",
+    "    taxi_df = taxi_df.map_partitions(compute_haversine_distance)\n",
+    "\n",
+    "    X = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\").to_dask_array(lengths=True)\n",
+    "    y = taxi_df[\"fare_amount\"].astype(\"float32\").to_dask_array(lengths=True)\n",
+    "\n",
+    "    X._meta = cp.asarray(X._meta)\n",
+    "    y._meta = cp.asarray(y._meta)\n",
+    "\n",
+    "    X, y = dask_utils.persist_across_workers(client, [X, y])\n",
+    "    return X, y\n",
+    "\n",
+    "\n",
+    "def train_model(params):\n",
+    "    cluster = get_cluster(threading.get_ident())\n",
+    "\n",
+    "    default_params = {\n",
+    "        \"objective\": \"reg:squarederror\",\n",
+    "        \"eval_metric\": \"rmse\",\n",
+    "        \"verbosity\": 0,\n",
+    "        \"tree_method\": \"hist\",\n",
+    "        \"device\": \"cuda\",\n",
+    "    }\n",
+    "    params = dict(default_params, **params)\n",
+    "\n",
+    "    with Client(cluster) as client:\n",
+    "        X, y = prepare_data(client)\n",
+    "        wait([X, y])\n",
+    "\n",
+    "        scores = []\n",
+    "        kfold = KFold(n_splits=5, shuffle=False)\n",
+    "        for train_index, test_index in kfold.split(X, y):\n",
+    "            dtrain = dxgb.DaskQuantileDMatrix(client, X[train_index, :], y[train_index])\n",
+    "            dtest = dxgb.DaskQuantileDMatrix(client, X[test_index, :], y[test_index])\n",
+    "            model = dxgb.train(\n",
+    "                client,\n",
+    "                params,\n",
+    "                dtrain,\n",
+    "                num_boost_round=10,\n",
+    "                verbose_eval=False,\n",
+    "            )\n",
+    "            y_test_pred = dxgb.predict(client, model, dtest).to_backend(\"cupy\")\n",
+    "            rmse_score = mean_squared_error(y[test_index], y_test_pred, squared=False)\n",
+    "            scores.append(rmse_score)\n",
+    "        return sum(scores) / len(scores)\n",
+    "\n",
+    "\n",
+    "def objective(trial):\n",
+    "    params = {\n",
+    "        \"n_estimators\": trial.suggest_int(\"n_estimators\", 2, 4),\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.5, 0.7),\n",
+    "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.5, 1),\n",
+    "        \"colsample_bynode\": trial.suggest_float(\"colsample_bynode\", 0.5, 1),\n",
+    "        \"colsample_bylevel\": trial.suggest_float(\"colsample_bylevel\", 0.5, 1),\n",
+    "        \"reg_lambda\": trial.suggest_float(\"reg_lambda\", 0, 1),\n",
+    "        \"max_depth\": trial.suggest_int(\"max_depth\", 1, 6),\n",
+    "        \"max_leaves\": trial.suggest_int(\"max_leaves\", 0, 2),\n",
+    "        \"max_cat_to_onehot\": trial.suggest_int(\"max_cat_to_onehot\", 1, 10),\n",
+    "    }\n",
+    "    return train_model(params)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c401aa1-2aeb-43d9-955b-4dfd7b495fe9",
+   "metadata": {},
+   "source": [
+    "To kick off multiple training jobs in parallel, we will launch multiple threads, so that each thread controls a Dask cluster.\n",
+    "One important utility function is `get_cluster`, which returns the Dask cluster that's mapped to a given thread."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "97cdeb8a-330e-4d96-92d4-d48c93828d9d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Map each thread's integer ID to a sequential number (0, 1, 2 ...)\n",
+    "thread_id_map: dict[int, KubeCluster] = {}\n",
+    "thread_id_map_lock = threading.Lock()\n",
+    "\n",
+    "\n",
+    "def get_cluster(thread_id: int) -> KubeCluster:\n",
+    "    with thread_id_map_lock:\n",
+    "        try:\n",
+    "            return clusters[thread_id_map[thread_id]]\n",
+    "        except KeyError:\n",
+    "            seq_id = len(thread_id_map)\n",
+    "            thread_id_map[thread_id] = seq_id\n",
+    "            return clusters[seq_id]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e7c923b-f4ea-4f38-b3a5-92dfcd47dfff",
+   "metadata": {},
+   "source": [
+    "Now we are ready to start hyperparameter optimization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "c557d769-0be6-4319-b7f5-8ad52b824961",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[I 2024-05-09 07:53:00,718] A new study created in memory with name: no-name-da830427-bce3-4e42-98e6-c98c0c3da0d7\n"
+     ]
+    }
+   ],
+   "source": [
+    "n_trials = 10  # set to a low number so that the demo finishes quickly. Feel free to adjust\n",
+    "study = optuna.create_study(direction=\"minimize\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "94ece2d0-b3f7-44c8-9b4e-a2f60fd623b9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[I 2024-05-09 07:54:10,229] Trial 1 finished with value: 59.449462890625 and parameters: {'n_estimators': 4, 'learning_rate': 0.6399993857892183, 'colsample_bytree': 0.7020623988319513, 'colsample_bynode': 0.777468318546648, 'colsample_bylevel': 0.7890749134903386, 'reg_lambda': 0.4464953694744921, 'max_depth': 3, 'max_leaves': 0, 'max_cat_to_onehot': 9}. Best is trial 1 with value: 59.449462890625.\n",
+      "[I 2024-05-09 07:54:19,507] Trial 0 finished with value: 57.77985763549805 and parameters: {'n_estimators': 4, 'learning_rate': 0.674087333032356, 'colsample_bytree': 0.557642421113256, 'colsample_bynode': 0.9719449711676733, 'colsample_bylevel': 0.6984302171973646, 'reg_lambda': 0.7201514298169174, 'max_depth': 4, 'max_leaves': 1, 'max_cat_to_onehot': 4}. Best is trial 0 with value: 57.77985763549805.\n",
+      "[I 2024-05-09 07:54:59,524] Trial 2 finished with value: 57.77985763549805 and parameters: {'n_estimators': 2, 'learning_rate': 0.6894880267544121, 'colsample_bytree': 0.8171662437182604, 'colsample_bynode': 0.549527686217645, 'colsample_bylevel': 0.890212178266078, 'reg_lambda': 0.5847298606135033, 'max_depth': 2, 'max_leaves': 1, 'max_cat_to_onehot': 5}. Best is trial 0 with value: 57.77985763549805.\n",
+      "[I 2024-05-09 07:55:22,013] Trial 3 finished with value: 55.01234817504883 and parameters: {'n_estimators': 4, 'learning_rate': 0.6597614733926671, 'colsample_bytree': 0.8437061126308156, 'colsample_bynode': 0.621479934699203, 'colsample_bylevel': 0.8330951489228277, 'reg_lambda': 0.7830102753448884, 'max_depth': 2, 'max_leaves': 2, 'max_cat_to_onehot': 2}. Best is trial 3 with value: 55.01234817504883.\n",
+      "[I 2024-05-09 07:56:00,678] Trial 4 finished with value: 57.77985763549805 and parameters: {'n_estimators': 4, 'learning_rate': 0.5994587326401378, 'colsample_bytree': 0.9799078215504886, 'colsample_bynode': 0.9766955839079614, 'colsample_bylevel': 0.5088864363378924, 'reg_lambda': 0.18103184809548734, 'max_depth': 3, 'max_leaves': 1, 'max_cat_to_onehot': 4}. Best is trial 3 with value: 55.01234817504883.\n",
+      "[I 2024-05-09 07:56:11,773] Trial 5 finished with value: 54.936126708984375 and parameters: {'n_estimators': 2, 'learning_rate': 0.5208827661289628, 'colsample_bytree': 0.866258912492528, 'colsample_bynode': 0.6368815844513638, 'colsample_bylevel': 0.9539603435186208, 'reg_lambda': 0.21390618865079458, 'max_depth': 4, 'max_leaves': 2, 'max_cat_to_onehot': 4}. Best is trial 5 with value: 54.936126708984375.\n",
+      "[I 2024-05-09 07:56:48,737] Trial 6 finished with value: 57.77985763549805 and parameters: {'n_estimators': 2, 'learning_rate': 0.6137888371528442, 'colsample_bytree': 0.9621063205689744, 'colsample_bynode': 0.5306812468481084, 'colsample_bylevel': 0.8527827651989199, 'reg_lambda': 0.3315799968401767, 'max_depth': 6, 'max_leaves': 1, 'max_cat_to_onehot': 9}. Best is trial 5 with value: 54.936126708984375.\n",
+      "[I 2024-05-09 07:56:59,261] Trial 7 finished with value: 55.204200744628906 and parameters: {'n_estimators': 3, 'learning_rate': 0.6831416027240611, 'colsample_bytree': 0.5311840770388268, 'colsample_bynode': 0.9572535535110238, 'colsample_bylevel': 0.6846894032354778, 'reg_lambda': 0.6091211134408249, 'max_depth': 3, 'max_leaves': 2, 'max_cat_to_onehot': 5}. Best is trial 5 with value: 54.936126708984375.\n",
+      "[I 2024-05-09 07:57:37,674] Trial 8 finished with value: 54.93584442138672 and parameters: {'n_estimators': 4, 'learning_rate': 0.620742285616388, 'colsample_bytree': 0.7969398985157778, 'colsample_bynode': 0.9049707375663323, 'colsample_bylevel': 0.7209693969245297, 'reg_lambda': 0.6158847054585023, 'max_depth': 1, 'max_leaves': 0, 'max_cat_to_onehot': 10}. Best is trial 8 with value: 54.93584442138672.\n",
+      "[I 2024-05-09 07:57:50,310] Trial 9 finished with value: 57.76123809814453 and parameters: {'n_estimators': 3, 'learning_rate': 0.5475197727057007, 'colsample_bytree': 0.5381502848057452, 'colsample_bynode': 0.8514705732161596, 'colsample_bylevel': 0.9139277684007088, 'reg_lambda': 0.5117732009332318, 'max_depth': 4, 'max_leaves': 0, 'max_cat_to_onehot': 5}. Best is trial 8 with value: 54.93584442138672.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# With n_jobs parameter, Optuna will launch [n_clusters] threads internally\n",
+    "# Each thread will deploy a training job to a Dask cluster\n",
+    "study.optimize(objective, n_trials=n_trials, n_jobs=n_clusters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac5b3cba-87ba-4470-a166-b6a0815f85e4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb b/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
index 5726ed4e..c321b250 100644
--- a/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
+++ b/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
@@ -410,9 +410,7 @@
     "        clf = dcv.GridSearchCV(model, gridsearch_params, cv=N_FOLDS, scoring=scorer)\n",
     "    elif mode == \"gpu-random\":\n",
     "        print(\"gpu-random selected\")\n",
-    "        clf = dcv.RandomizedSearchCV(\n",
-    "            model, gridsearch_params, cv=N_FOLDS, scoring=scorer, n_iter=n_iter\n",
-    "        )\n",
+    "        clf = dcv.RandomizedSearchCV(model, gridsearch_params, cv=N_FOLDS, scoring=scorer, n_iter=n_iter)\n",
     "\n",
     "    else:\n",
     "        print(\"Unknown Option, please choose one of [gpu-grid, gpu-random]\")\n",
@@ -569,9 +567,7 @@
     "mode = \"gpu-grid\"\n",
     "\n",
     "with timed(\"XGB-\" + mode):\n",
-    "    res, results = do_HPO(\n",
-    "        model_gpu_xgb, params_xgb, cuml_accuracy_scorer, X_train, y_cpu, mode=mode\n",
-    "    )\n",
+    "    res, results = do_HPO(model_gpu_xgb, params_xgb, cuml_accuracy_scorer, X_train, y_cpu, mode=mode)\n",
     "num_params = len(results.cv_results_[\"mean_test_score\"])\n",
     "print(f\"Searched over {num_params} parameters\")"
    ]
diff --git a/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py b/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
index 37ccf356..06fbd6e1 100644
--- a/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
+++ b/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
@@ -70,9 +70,7 @@ def train_xgboost(trial, *, target, reseed_rng, threads_per_worker=None):
     params = {
         "max_depth": trial.suggest_int("max_depth", 4, 8),
         "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1, log=True),
-        "min_child_weight": trial.suggest_float(
-            "min_child_weight", 0.1, 10.0, log=True
-        ),
+        "min_child_weight": trial.suggest_float("min_child_weight", 0.1, 10.0, log=True),
         "reg_alpha": trial.suggest_float("reg_alpha", 0.0001, 100, log=True),
         "reg_lambda": trial.suggest_float("reg_lambda", 0.0001, 100, log=True),
         "verbosity": 0,
@@ -135,16 +133,12 @@ def train_randomforest(trial, *, target, reseed_rng, threads_per_worker=None):
 
             params["n_streams"] = 4
             params["n_bins"] = 256
-            params["split_criterion"] = trial.suggest_categorical(
-                "split_criterion", ["gini", "entropy"]
-            )
+            params["split_criterion"] = trial.suggest_categorical("split_criterion", ["gini", "entropy"])
             trained_model = RF_gpu(**params)
             accuracy_score_func = accuracy_score_gpu
         else:
             params["n_jobs"] = threads_per_worker
-            params["criterion"] = trial.suggest_categorical(
-                "criterion", ["gini", "entropy"]
-            )
+            params["criterion"] = trial.suggest_categorical("criterion", ["gini", "entropy"])
             trained_model = RF_cpu(**params)
             accuracy_score_func = accuracy_score_cpu
 
@@ -228,16 +222,12 @@ def main(args):
                 )
                 for _ in range(*iter_range)
             ]
-            print(
-                f"Testing hyperparameter combinations {iter_range[0]}..{iter_range[1]}"
-            )
+            print(f"Testing hyperparameter combinations {iter_range[0]}..{iter_range[1]}")
             _ = wait(futures)
             for fut in futures:
                 _ = fut.result()  # Ensure that the training job was successful
             tnow = time.perf_counter()
-            print(
-                f"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}"
-            )
+            print(f"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}")
     tend = time.perf_counter()
     print(f"Time elapsed: {tend - tstart} sec")
     cluster.close()
@@ -245,9 +235,7 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model-type", type=str, required=True, choices=["XGBoost", "RandomForest"]
-    )
+    parser.add_argument("--model-type", type=str, required=True, choices=["XGBoost", "RandomForest"])
     parser.add_argument("--target", required=True, choices=["gpu", "cpu"])
     parser.add_argument(
         "--threads_per_worker",

From b4bce66bcdb729da8284a43905cec8d3cb7015cd Mon Sep 17 00:00:00 2001
From: Melody Wang 
Date: Fri, 11 Oct 2024 08:44:36 -0400
Subject: [PATCH 22/27] fix linting issues

---
 extensions/rapids_notebook_files.py           |   8 +-
 extensions/rapids_related_examples.py         |  18 +-
 extensions/rapids_version_templating.py       |  12 +-
 package-lock.json                             |   2 +-
 package.json                                  |   2 +-
 source/conf.py                                |  20 +-
 .../rapids-1brc-single-node/notebook.ipynb    |   4 +-
 .../notebook.ipynb                            |  40 +++-
 .../rapids-azureml-hpo/notebook.ipynb         |   8 +-
 .../rapids-azureml-hpo/rapids_csp_azure.py    |  48 +++-
 .../rapids-azureml-hpo/train_rapids.py        |  28 ++-
 .../examples/rapids-ec2-mnmg/notebook.ipynb   |  16 +-
 .../examples/rapids-optuna-hpo/notebook.ipynb |  12 +-
 .../rapids-sagemaker-higgs/notebook.ipynb     |   9 +-
 .../rapids-sagemaker-higgs/rapids-higgs.py    |   4 +-
 .../rapids-sagemaker-hpo/HPOConfig.py         |  12 +-
 .../rapids-sagemaker-hpo/MLWorkflow.py        |   4 +-
 .../rapids-sagemaker-hpo/helper_functions.py  |  29 ++-
 .../rapids-sagemaker-hpo/notebook.ipynb       |  12 +-
 source/examples/rapids-sagemaker-hpo/serve.py |  25 +-
 source/examples/rapids-sagemaker-hpo/train.py |   8 +-
 .../workflows/MLWorkflowMultiCPU.py           |  12 +-
 .../workflows/MLWorkflowMultiGPU.py           |  16 +-
 .../workflows/MLWorkflowSingleCPU.py          |   8 +-
 .../workflows/MLWorkflowSingleGPU.py          |  25 +-
 .../notebook.ipynb                            | 217 +++++++++++++-----
 .../notebook.ipynb                            |  20 +-
 .../xgboost-dask-databricks/notebook.ipynb    |  20 +-
 .../notebook.ipynb                            |  18 +-
 .../notebook.ipynb                            |  18 +-
 .../notebook.ipynb                            |  26 ++-
 .../notebook.ipynb                            |   8 +-
 .../xgboost-rf-gpu-cpu-benchmark/hpo.py       |  24 +-
 source/guides/azure/infiniband.md             |   1 +
 34 files changed, 552 insertions(+), 182 deletions(-)

diff --git a/extensions/rapids_notebook_files.py b/extensions/rapids_notebook_files.py
index 8b6b027f..66d68ef8 100644
--- a/extensions/rapids_notebook_files.py
+++ b/extensions/rapids_notebook_files.py
@@ -16,7 +16,9 @@ def walk_files(app, dir, outdir):
     related_notebook_files = {}
     for page in dir.glob("*"):
         if page.is_dir():
-            related_notebook_files[page.name] = walk_files(app, page, outdir / page.name)
+            related_notebook_files[page.name] = walk_files(
+                app, page, outdir / page.name
+            )
         else:
             with contextlib.suppress(OSError):
                 os.remove(str(outdir / page.name))
@@ -57,7 +59,9 @@ def find_notebook_related_files(app, pagename, templatename, context, doctree):
         path_to_output_parent = output_root / rel_page_parent
 
         # Copy all related files to output and apply templating
-        related_notebook_files = walk_files(app, path_to_page_parent, path_to_output_parent)
+        related_notebook_files = walk_files(
+            app, path_to_page_parent, path_to_output_parent
+        )
 
         # Make archive of related files
         if related_notebook_files and len(related_notebook_files) > 1:
diff --git a/extensions/rapids_related_examples.py b/extensions/rapids_related_examples.py
index 94312715..ef52bf3e 100644
--- a/extensions/rapids_related_examples.py
+++ b/extensions/rapids_related_examples.py
@@ -22,7 +22,9 @@ def read_notebook_tags(path: str) -> list[str]:
         return []
 
 
-def generate_notebook_grid_myst(notebooks: list[str], env: BuildEnvironment) -> list[str]:
+def generate_notebook_grid_myst(
+    notebooks: list[str], env: BuildEnvironment
+) -> list[str]:
     """Generate sphinx-design grid of notebooks in MyST markdown.
 
     Take a list of notebook documents and render out some MyST markdown displaying those
@@ -73,7 +75,11 @@ def get_title_for_notebook(path: str) -> str:
                 if i == len(cell_source) - 1:  # no next_token
                     continue
                 next_token = cell_source[i + 1]
-                if token.type == "heading_open" and token.tag == "h1" and next_token.type == "inline":
+                if (
+                    token.type == "heading_open"
+                    and token.tag == "h1"
+                    and next_token.type == "inline"
+                ):
                     return next_token.content
     raise ValueError("No top-level heading found")
 
@@ -140,7 +146,9 @@ def add_notebook_tag_map_to_context(app, pagename, templatename, context, doctre
         except KeyError:
             tag_tree[root] = [suffix]
     context["notebook_tag_tree"] = tag_tree
-    context["notebook_tags"] = [tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages]
+    context["notebook_tags"] = [
+        tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages
+    ]
 
 
 class NotebookGalleryTocTree(TocTree):
@@ -154,7 +162,9 @@ def run(self) -> list[nodes.Node]:
         output += toctree
 
         # Generate the card grid for all items in the toctree
-        notebooks = [notebook for _, notebook in toctree[0].children[0].attributes["entries"]]
+        notebooks = [
+            notebook for _, notebook in toctree[0].children[0].attributes["entries"]
+        ]
         grid_markdown = generate_notebook_grid_myst(notebooks=notebooks, env=self.env)
         for node in parse_markdown(markdown=grid_markdown, state=self.state):
             gallery += node
diff --git a/extensions/rapids_version_templating.py b/extensions/rapids_version_templating.py
index d8b12333..c2c71817 100644
--- a/extensions/rapids_version_templating.py
+++ b/extensions/rapids_version_templating.py
@@ -49,7 +49,9 @@ def visit_reference(self, node: nodes.reference) -> None:
         uri_str = re.sub(r"~~~(.*?)~~~", r"{{ \1 }}", uri_str)
 
         # fill in appropriate values based on app context
-        node.attributes["refuri"] = re.sub(r"(? None:
         Replace template strings in generic text.
         This roughly corresponds to HTML ``

``, ``

``, and similar elements.
         """
-        new_node = nodes.Text(re.sub(r"(? str:
@@ -67,7 +71,9 @@ def template_func(self, match: re.Match) -> str:
         Replace template strings like ``{{ rapids_version }}`` with real
         values like ``24.10``.
         """
-        return self.app.builder.templates.render_string(source=match.group(), context=self.app.config.rapids_version)
+        return self.app.builder.templates.render_string(
+            source=match.group(), context=self.app.config.rapids_version
+        )
 
 
 def version_template(
diff --git a/package-lock.json b/package-lock.json
index b9036882..0d3089e1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5,7 +5,7 @@
   "packages": {
     "": {
       "devDependencies": {
-        "prettier": "3.3.3"
+        "prettier": "^3.3.3"
       }
     },
     "node_modules/prettier": {
diff --git a/package.json b/package.json
index a32393d7..c2436a9f 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
   "devDependencies": {
-    "prettier": "3.3.3"
+    "prettier": "^3.3.3"
   }
 }
diff --git a/source/conf.py b/source/conf.py
index 02ce7ec4..3094edd7 100644
--- a/source/conf.py
+++ b/source/conf.py
@@ -43,12 +43,18 @@
     },
 }
 rapids_version = (
-    versions["stable"] if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true" else versions["nightly"]
+    versions["stable"]
+    if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true"
+    else versions["nightly"]
 )
 rapids_version["rapids_conda_channels_list"] = [
-    channel for channel in rapids_version["rapids_conda_channels"].split(" ") if channel != "-c"
+    channel
+    for channel in rapids_version["rapids_conda_channels"].split(" ")
+    if channel != "-c"
 ]
-rapids_version["rapids_conda_packages_list"] = rapids_version["rapids_conda_packages"].split(" ")
+rapids_version["rapids_conda_packages_list"] = rapids_version[
+    "rapids_conda_packages"
+].split(" ")
 
 # -- General configuration ---------------------------------------------------
 
@@ -88,7 +94,9 @@
 # -- Options for notebooks -------------------------------------------------
 
 nb_execution_mode = "off"
-rapids_deployment_notebooks_base_url = "https://github.com/rapidsai/deployment/blob/main/source/"
+rapids_deployment_notebooks_base_url = (
+    "https://github.com/rapidsai/deployment/blob/main/source/"
+)
 
 # -- Options for HTML output -------------------------------------------------
 
@@ -138,6 +146,8 @@
 def setup(app):
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
     app.add_css_file("css/custom.css")
-    app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
+    app.add_js_file(
+        "https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
+    )
     app.add_js_file("js/nav.js", loading_method="defer")
     app.add_js_file("js/notebook-gallery.js", loading_method="defer")
diff --git a/source/examples/rapids-1brc-single-node/notebook.ipynb b/source/examples/rapids-1brc-single-node/notebook.ipynb
index e1cde0c0..aee011e5 100755
--- a/source/examples/rapids-1brc-single-node/notebook.ipynb
+++ b/source/examples/rapids-1brc-single-node/notebook.ipynb
@@ -200,7 +200,9 @@
    "source": [
     "n = 1_000_000_000  # Number of rows of data to generate\n",
     "\n",
-    "lookup_df = cudf.read_csv(\"lookup.csv\")  # Load our lookup table of stations and their mean temperatures\n",
+    "lookup_df = cudf.read_csv(\n",
+    "    \"lookup.csv\"\n",
+    ")  # Load our lookup table of stations and their mean temperatures\n",
     "std = 10.0  # We assume temperatures are normally distributed with a standard deviation of 10\n",
     "chunksize = 2e8  # Set the number of rows to generate in one go (reduce this if you run into GPU RAM limits)\n",
     "filename = Path(\"measurements.txt\")  # Choose where to write to\n",
diff --git a/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb b/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
index 751037cc..886a359d 100644
--- a/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
+++ b/source/examples/rapids-autoscaling-multi-tenant-kubernetes/notebook.ipynb
@@ -995,8 +995,12 @@
     "\n",
     "\n",
     "def map_haversine(part):\n",
-    "    pickup = cuspatial.GeoSeries.from_points_xy(part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
-    "    dropoff = cuspatial.GeoSeries.from_points_xy(part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
+    "        part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
+    "    )\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
+    "        part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
+    "    )\n",
     "    return cuspatial.haversine_distance(pickup, dropoff)\n",
     "\n",
     "\n",
@@ -1502,7 +1506,9 @@
     "from random import randrange\n",
     "\n",
     "\n",
-    "def generate_workload(stages=3, min_width=1, max_width=3, variation=1, input_workload=None):\n",
+    "def generate_workload(\n",
+    "    stages=3, min_width=1, max_width=3, variation=1, input_workload=None\n",
+    "):\n",
     "    graph = [input_workload] if input_workload is not None else [run_haversine()]\n",
     "    last_width = min_width\n",
     "    for _ in range(stages):\n",
@@ -1640,25 +1646,35 @@
    ],
    "source": [
     "%%time\n",
-    "start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+    "start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\n",
+    "    \"%Y-%m-%dT%H:%M:%SZ\"\n",
+    ")\n",
     "try:\n",
     "    # Start with a couple of concurrent workloads\n",
     "    workload = generate_workload(stages=10, max_width=2)\n",
     "    # Then increase demand as more users appear\n",
-    "    workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
+    "    workload = generate_workload(\n",
+    "        stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
+    "    )\n",
     "    # Now reduce the workload for a longer period of time, this could be over a lunchbreak or something\n",
     "    workload = generate_workload(stages=30, max_width=2, input_workload=workload)\n",
     "    # Everyone is back from lunch and it hitting the cluster hard\n",
-    "    workload = generate_workload(stages=10, max_width=10, min_width=3, variation=5, input_workload=workload)\n",
+    "    workload = generate_workload(\n",
+    "        stages=10, max_width=10, min_width=3, variation=5, input_workload=workload\n",
+    "    )\n",
     "    # The after lunch rush is easing\n",
-    "    workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
+    "    workload = generate_workload(\n",
+    "        stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
+    "    )\n",
     "    # As we get towards the end of the day demand slows off again\n",
     "    workload = generate_workload(stages=10, max_width=2, input_workload=workload)\n",
     "    workload.compute()\n",
     "finally:\n",
     "    client.close()\n",
     "    cluster.close()\n",
-    "    end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
+    "    end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\n",
+    "        \"%Y-%m-%dT%H:%M:%SZ\"\n",
+    "    )"
    ]
   },
   {
@@ -2021,10 +2037,14 @@
     "    end_time,\n",
     "    \"1s\",\n",
     ")\n",
-    "running_pods = running_pods[running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))]\n",
+    "running_pods = running_pods[\n",
+    "    running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))\n",
+    "]\n",
     "nodes = p.query_range(\"count(kube_node_info)\", start_time, end_time, \"1s\")\n",
     "nodes.columns = [\"Available GPUs\"]\n",
-    "nodes[\"Available GPUs\"] = nodes[\"Available GPUs\"] * 2  # We know our nodes each had 2 GPUs\n",
+    "nodes[\"Available GPUs\"] = (\n",
+    "    nodes[\"Available GPUs\"] * 2\n",
+    ")  # We know our nodes each had 2 GPUs\n",
     "nodes[\"Utilized GPUs\"] = running_pods.sum(axis=1)"
    ]
   },
diff --git a/source/examples/rapids-azureml-hpo/notebook.ipynb b/source/examples/rapids-azureml-hpo/notebook.ipynb
index 02667938..14575363 100644
--- a/source/examples/rapids-azureml-hpo/notebook.ipynb
+++ b/source/examples/rapids-azureml-hpo/notebook.ipynb
@@ -218,7 +218,9 @@
     "    )\n",
     "    ml_client.compute.begin_create_or_update(gpu_target).result()\n",
     "\n",
-    "    print(f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\")"
+    "    print(\n",
+    "        f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\"\n",
+    "    )"
    ]
   },
   {
@@ -485,7 +487,9 @@
     "\n",
     "\n",
     "# Define the limits for this sweep\n",
-    "sweep_job.set_limits(max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600)\n",
+    "sweep_job.set_limits(\n",
+    "    max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600\n",
+    ")\n",
     "\n",
     "\n",
     "# Specify your experiment details\n",
diff --git a/source/examples/rapids-azureml-hpo/rapids_csp_azure.py b/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
index 683e120b..ea7724ea 100644
--- a/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
+++ b/source/examples/rapids-azureml-hpo/rapids_csp_azure.py
@@ -132,7 +132,9 @@ def load_hyperparams(self, model_name="XGBoost"):
             self.log_to_file(str(error))
             return
 
-    def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"):
+    def load_data(
+        self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"
+    ):
         """
         Loading the data into the object from the filename and based on the columns that we are
         interested in. Also, generates y_label from 'ArrDelay' column to convert this into a binary
@@ -183,7 +185,9 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi
 
                     elif "multi" in self.compute_type:
                         self.log_to_file("\n\tReading using dask dataframe")
-                        dataset = dask.dataframe.read_parquet(target_filename, columns=col_labels)
+                        dataset = dask.dataframe.read_parquet(
+                            target_filename, columns=col_labels
+                        )
 
             elif "GPU" in self.compute_type:
                 # GPU Reading Option
@@ -201,7 +205,9 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi
 
                     elif "multi" in self.compute_type:
                         self.log_to_file("\n\tReading using dask_cudf")
-                        dataset = dask_cudf.read_parquet(target_filename, columns=col_labels)
+                        dataset = dask_cudf.read_parquet(
+                            target_filename, columns=col_labels
+                        )
 
         # cast all columns to float32
         for col in dataset.columns:
@@ -216,10 +222,14 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi
         dataset = dataset.fillna(0.0)  # Filling the null values. Needed for dask-cudf
 
         self.log_to_file(f"\n\tIngestion completed in {ingestion_timer.duration}")
-        self.log_to_file(f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}")
+        self.log_to_file(
+            f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}"
+        )
         return dataset, col_labels, y_label, ingestion_timer.duration
 
-    def split_data(self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True):
+    def split_data(
+        self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True
+    ):
         """
         Splitting data into train and test split, has appropriate imports for different compute modes.
         CPU compute - Uses sklearn, we manually filter y_label column in the split call
@@ -311,9 +321,13 @@ def train_model(self, X_train, y_train, model_params):
 
         try:
             if self.model_type == "XGBoost":
-                trained_model, training_time = self.fit_xgboost(X_train, y_train, model_params)
+                trained_model, training_time = self.fit_xgboost(
+                    X_train, y_train, model_params
+                )
             elif self.model_type == "RandomForest":
-                trained_model, training_time = self.fit_random_forest(X_train, y_train, model_params)
+                trained_model, training_time = self.fit_random_forest(
+                    X_train, y_train, model_params
+                )
         except Exception as error:
             self.log_to_file("\n\n!error during model training: " + str(error))
         self.log_to_file(f"\n\tFinished training in {training_time:.4f} s")
@@ -340,7 +354,9 @@ def fit_xgboost(self, X_train, y_train, model_params):
                 )
             elif "multi" in self.compute_type:
                 self.log_to_file("\n\tTraining multi-GPU XGBoost")
-                train_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_train, label=y_train)
+                train_DMatrix = xgboost.dask.DaskDMatrix(
+                    self.client, data=X_train, label=y_train
+                )
                 trained_model = xgboost.dask.train(
                     self.client,
                     dtrain=train_DMatrix,
@@ -425,8 +441,12 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
             try:
                 if self.model_type == "XGBoost":
                     if "multi" in self.compute_type:
-                        test_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_test, label=y_test)
-                        xgb_pred = xgboost.dask.predict(self.client, trained_model, test_DMatrix).compute()
+                        test_DMatrix = xgboost.dask.DaskDMatrix(
+                            self.client, data=X_test, label=y_test
+                        )
+                        xgb_pred = xgboost.dask.predict(
+                            self.client, trained_model, test_DMatrix
+                        ).compute()
                         xgb_pred = (xgb_pred > threshold) * 1.0
                         test_accuracy = accuracy_score(y_test.compute(), xgb_pred)
                     elif "single" in self.compute_type:
@@ -439,9 +459,13 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
                     if "multi" in self.compute_type:
                         cuml_pred = trained_model.predict(X_test).compute()
                         self.log_to_file("\n\tPrediction complete")
-                        test_accuracy = accuracy_score(y_test.compute(), cuml_pred, convert_dtype=True)
+                        test_accuracy = accuracy_score(
+                            y_test.compute(), cuml_pred, convert_dtype=True
+                        )
                     elif "single" in self.compute_type:
-                        test_accuracy = trained_model.score(X_test, y_test.astype("int32"))
+                        test_accuracy = trained_model.score(
+                            X_test, y_test.astype("int32")
+                        )
 
             except Exception as error:
                 self.log_to_file("\n\n!error during inference: " + str(error))
diff --git a/source/examples/rapids-azureml-hpo/train_rapids.py b/source/examples/rapids-azureml-hpo/train_rapids.py
index a170e6f5..63ce4f5f 100644
--- a/source/examples/rapids-azureml-hpo/train_rapids.py
+++ b/source/examples/rapids-azureml-hpo/train_rapids.py
@@ -28,8 +28,12 @@ def main():
     parser = argparse.ArgumentParser()
 
     parser.add_argument("--data_dir", type=str, help="location of data")
-    parser.add_argument("--n_estimators", type=int, default=100, help="Number of trees in RF")
-    parser.add_argument("--max_depth", type=int, default=16, help="Max depth of each tree")
+    parser.add_argument(
+        "--n_estimators", type=int, default=100, help="Number of trees in RF"
+    )
+    parser.add_argument(
+        "--max_depth", type=int, default=16, help="Max depth of each tree"
+    )
     parser.add_argument(
         "--n_bins",
         type=int,
@@ -48,7 +52,9 @@ def main():
         default="single-GPU",
         help="set to multi-GPU for algorithms via dask",
     )
-    parser.add_argument("--cv_folds", type=int, default=5, help="Number of CV fold splits")
+    parser.add_argument(
+        "--cv_folds", type=int, default=5, help="Number of CV fold splits"
+    )
 
     args = parser.parse_args()
     data_dir = args.data_dir
@@ -128,14 +134,20 @@ def main():
         print(f"\n CV fold { i_train_fold } of { cv_folds }\n")
 
         # split data
-        X_train, X_test, y_train, y_test, _ = azure_ml.split_data(X, y, random_state=i_train_fold)
+        X_train, X_test, y_train, y_test, _ = azure_ml.split_data(
+            X, y, random_state=i_train_fold
+        )
         # train model
-        trained_model, training_time = azure_ml.train_model(X_train, y_train, model_params)
+        trained_model, training_time = azure_ml.train_model(
+            X_train, y_train, model_params
+        )
 
         train_time_per_fold.append(round(training_time, 4))
 
         # evaluate perf
-        test_accuracy, infer_time = azure_ml.evaluate_test_perf(trained_model, X_test, y_test)
+        test_accuracy, infer_time = azure_ml.evaluate_test_perf(
+            trained_model, X_test, y_test
+        )
         accuracy_per_fold.append(round(test_accuracy, 4))
         infer_time_per_fold.append(round(infer_time, 4))
 
@@ -143,7 +155,9 @@ def main():
         if test_accuracy > global_best_test_accuracy:
             global_best_test_accuracy = test_accuracy
 
-    mlflow.log_metric("Total training inference time", np.float(training_time + infer_time))
+    mlflow.log_metric(
+        "Total training inference time", np.float(training_time + infer_time)
+    )
     mlflow.log_metric("Accuracy", np.float(global_best_test_accuracy))
     print("\n Accuracy             :", global_best_test_accuracy)
     print("\n accuracy per fold    :", accuracy_per_fold)
diff --git a/source/examples/rapids-ec2-mnmg/notebook.ipynb b/source/examples/rapids-ec2-mnmg/notebook.ipynb
index d0f08884..79ca421a 100644
--- a/source/examples/rapids-ec2-mnmg/notebook.ipynb
+++ b/source/examples/rapids-ec2-mnmg/notebook.ipynb
@@ -284,7 +284,9 @@
     "taxi_df[\"is_weekend\"] = (taxi_df[\"day_of_week\"] >= 5).astype(\"int32\")\n",
     "\n",
     "# calculate the time difference between dropoff and pickup.\n",
-    "taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\"pickup_datetime\"].astype(\"int32\")\n",
+    "taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\n",
+    "    \"pickup_datetime\"\n",
+    "].astype(\"int32\")\n",
     "taxi_df[\"diff\"] = (taxi_df[\"diff\"] / 1000).astype(\"int32\")\n",
     "\n",
     "taxi_df[\"pickup_latitude_r\"] = taxi_df[\"pickup_latitude\"] // 0.01 * 0.01\n",
@@ -299,8 +301,12 @@
     "def haversine_dist(df):\n",
     "    import cuspatial\n",
     "\n",
-    "    pickup = cuspatial.GeoSeries.from_points_xy(df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
-    "    dropoff = cuspatial.GeoSeries.from_points_xy(df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
+    "        df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
+    "    )\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
+    "        df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
+    "    )\n",
     "    df[\"h_distance\"] = cuspatial.haversine_distance(pickup, dropoff)\n",
     "    df[\"h_distance\"] = df[\"h_distance\"].astype(\"float32\")\n",
     "    return df\n",
@@ -325,7 +331,9 @@
    "outputs": [],
    "source": [
     "# Split into training and validation sets\n",
-    "X, y = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\"), taxi_df[\"fare_amount\"].astype(\"float32\")\n",
+    "X, y = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\"), taxi_df[\n",
+    "    \"fare_amount\"\n",
+    "].astype(\"float32\")\n",
     "X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True)"
    ]
   },
diff --git a/source/examples/rapids-optuna-hpo/notebook.ipynb b/source/examples/rapids-optuna-hpo/notebook.ipynb
index 678c85ca..127d08ce 100644
--- a/source/examples/rapids-optuna-hpo/notebook.ipynb
+++ b/source/examples/rapids-optuna-hpo/notebook.ipynb
@@ -175,7 +175,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def train_and_eval(X_param, y_param, penalty=\"l2\", C=1.0, l1_ratio=None, fit_intercept=True):\n",
+    "def train_and_eval(\n",
+    "    X_param, y_param, penalty=\"l2\", C=1.0, l1_ratio=None, fit_intercept=True\n",
+    "):\n",
     "    \"\"\"\n",
     "    Splits the given data into train and test split to train and evaluate the model\n",
     "    for the params parameters.\n",
@@ -192,7 +194,9 @@
     "    Returns\n",
     "    score: log loss of the fitted model\n",
     "    \"\"\"\n",
-    "    X_train, X_valid, y_train, y_valid = train_test_split(X_param, y_param, random_state=42)\n",
+    "    X_train, X_valid, y_train, y_valid = train_test_split(\n",
+    "        X_param, y_param, random_state=42\n",
+    "    )\n",
     "    classifier = LogisticRegression(\n",
     "        penalty=penalty,\n",
     "        C=C,\n",
@@ -259,7 +263,9 @@
     "    penalty = trial.suggest_categorical(\"penalty\", [\"none\", \"l1\", \"l2\"])\n",
     "    fit_intercept = trial.suggest_categorical(\"fit_intercept\", [True, False])\n",
     "\n",
-    "    score = train_and_eval(X_param, y_param, penalty=penalty, C=C, fit_intercept=fit_intercept)\n",
+    "    score = train_and_eval(\n",
+    "        X_param, y_param, penalty=penalty, C=C, fit_intercept=fit_intercept\n",
+    "    )\n",
     "    return score"
    ]
   },
diff --git a/source/examples/rapids-sagemaker-higgs/notebook.ipynb b/source/examples/rapids-sagemaker-higgs/notebook.ipynb
index ad648d37..3282c3b5 100644
--- a/source/examples/rapids-sagemaker-higgs/notebook.ipynb
+++ b/source/examples/rapids-sagemaker-higgs/notebook.ipynb
@@ -402,7 +402,9 @@
    },
    "outputs": [],
    "source": [
-    "ECR_container_fullname = f\"{account}.dkr.ecr.{region}.amazonaws.com/{estimator_info['ecr_image']}\""
+    "ECR_container_fullname = (\n",
+    "    f\"{account}.dkr.ecr.{region}.amazonaws.com/{estimator_info['ecr_image']}\"\n",
+    ")"
    ]
   },
   {
@@ -455,7 +457,10 @@
     }
    ],
    "source": [
-    "print(f\"source      : {estimator_info['ecr_image']}\\n\" f\"destination : {ECR_container_fullname}\")"
+    "print(\n",
+    "    f\"source      : {estimator_info['ecr_image']}\\n\"\n",
+    "    f\"destination : {ECR_container_fullname}\"\n",
+    ")"
    ]
   },
   {
diff --git a/source/examples/rapids-sagemaker-higgs/rapids-higgs.py b/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
index cea9649b..0093e574 100644
--- a/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
+++ b/source/examples/rapids-sagemaker-higgs/rapids-higgs.py
@@ -13,7 +13,9 @@ def main(args):
     data_dir = args.data_dir
 
     col_names = ["label"] + [f"col-{i}" for i in range(2, 30)]  # Assign column names
-    dtypes_ls = ["int32"] + ["float32" for _ in range(2, 30)]  # Assign dtypes to each column
+    dtypes_ls = ["int32"] + [
+        "float32" for _ in range(2, 30)
+    ]  # Assign dtypes to each column
 
     data = cudf.read_csv(data_dir + "HIGGS.csv", names=col_names, dtype=dtypes_ls)
     X_train, X_test, y_train, y_test = train_test_split(data, "label", train_size=0.70)
diff --git a/source/examples/rapids-sagemaker-hpo/HPOConfig.py b/source/examples/rapids-sagemaker-hpo/HPOConfig.py
index e1a2be30..f8fe94b9 100644
--- a/source/examples/rapids-sagemaker-hpo/HPOConfig.py
+++ b/source/examples/rapids-sagemaker-hpo/HPOConfig.py
@@ -61,7 +61,9 @@ def __init__(
         ) = self.detect_data_inputs(directory_structure)
 
         self.model_store_directory = directory_structure["model_store"]
-        self.output_artifacts_directory = directory_structure["output_artifacts"]  # noqa
+        self.output_artifacts_directory = directory_structure[
+            "output_artifacts"
+        ]  # noqa
 
     def parse_configuration(self):
         """Parse the ENV variables [ set in the dockerfile ]
@@ -126,7 +128,9 @@ def parse_configuration(self):
 
     def parse_hyper_parameter_inputs(self, input_args):
         """Parse hyperparmeters provided by the HPO orchestrator"""
-        hpo_log.info("parsing model hyperparameters from command line arguments...log")  # noqa
+        hpo_log.info(
+            "parsing model hyperparameters from command line arguments...log"
+        )  # noqa
         parser = argparse.ArgumentParser()
 
         if "XGBoost" in self.model_type:
@@ -215,7 +219,9 @@ def detect_data_inputs(self, directory_structure):
                single-GPU cudf read_parquet needs a list of files
                multi-CPU/GPU can accept either a list or a directory
         """
-        parquet_files = glob.glob(os.path.join(directory_structure["train_data"], "*.parquet"))
+        parquet_files = glob.glob(
+            os.path.join(directory_structure["train_data"], "*.parquet")
+        )
         csv_files = glob.glob(os.path.join(directory_structure["train_data"], "*.csv"))
 
         if len(csv_files):
diff --git a/source/examples/rapids-sagemaker-hpo/MLWorkflow.py b/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
index 31f8f065..ee3e1431 100644
--- a/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
+++ b/source/examples/rapids-sagemaker-hpo/MLWorkflow.py
@@ -89,7 +89,9 @@ def timed_execution_wrapper(*args, **kwargs):
         start_time = time.perf_counter()
         result = target_function(*args, **kwargs)
         exec_time = time.perf_counter() - start_time
-        hpo_log.info(f" --- {target_function.__name__}" f" completed in {exec_time:.5f} s")
+        hpo_log.info(
+            f" --- {target_function.__name__}" f" completed in {exec_time:.5f} s"
+        )
         return result
 
     return timed_execution_wrapper
diff --git a/source/examples/rapids-sagemaker-hpo/helper_functions.py b/source/examples/rapids-sagemaker-hpo/helper_functions.py
index 3b8bd1b2..27a7a6cd 100644
--- a/source/examples/rapids-sagemaker-hpo/helper_functions.py
+++ b/source/examples/rapids-sagemaker-hpo/helper_functions.py
@@ -51,7 +51,10 @@ def recommend_instance_type(code_choice, dataset_directory):
         detail_str = "4x GPUs [ V100 ], 64GB GPU memory,  244GB CPU memory"
         recommended_instance_type = "ml.p3.8xlarge"
 
-    print(f"recommended instance type : {recommended_instance_type} \n" f"instance details          : {detail_str}")
+    print(
+        f"recommended instance type : {recommended_instance_type} \n"
+        f"instance details          : {detail_str}"
+    )
 
     return recommended_instance_type
 
@@ -61,7 +64,8 @@ def validate_dockerfile(rapids_base_container, dockerfile_name="Dockerfile"):
     with open(dockerfile_name) as dockerfile_handle:
         if rapids_base_container not in dockerfile_handle.read():
             raise Exception(
-                "Dockerfile base layer [i.e. FROM statment] does" " not match the variable rapids_base_container"
+                "Dockerfile base layer [i.e. FROM statment] does"
+                " not match the variable rapids_base_container"
             )
 
 
@@ -102,11 +106,17 @@ def summarize_hpo_results(tuning_job_name):
     hpo_results = (
         boto3.Session()
         .client("sagemaker")
-        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)
+        .describe_hyper_parameter_tuning_job(
+            HyperParameterTuningJobName=tuning_job_name
+        )
     )
 
     best_job = hpo_results["BestTrainingJob"]["TrainingJobName"]
-    best_score = hpo_results["BestTrainingJob"]["FinalHyperParameterTuningJobObjectiveMetric"]["Value"]  # noqa
+    best_score = hpo_results["BestTrainingJob"][
+        "FinalHyperParameterTuningJobObjectiveMetric"
+    ][
+        "Value"
+    ]  # noqa
     best_params = hpo_results["BestTrainingJob"]["TunedHyperParameters"]
     print(f"best score: {best_score}")
     print(f"best params: {best_params}")
@@ -182,7 +192,11 @@ def new_job_name_from_config(
 
         random_str = "".join(random.choices(uuid.uuid4().hex, k=trim_limit))
 
-        job_name = f"{data_choice_str}-{code_choice_str}" f"-{algorithm_choice_str}-{cv_folds}cv" f"-{random_str}"
+        job_name = (
+            f"{data_choice_str}-{code_choice_str}"
+            f"-{algorithm_choice_str}-{cv_folds}cv"
+            f"-{random_str}"
+        )
 
         job_name = job_name[:trim_limit]
 
@@ -203,4 +217,7 @@ def validate_region(region):
         region = region[0]
 
     if region not in ["us-east-1", "us-west-2"]:
-        raise Exception("Unsupported region based on demo data location," " please switch to us-east-1 or us-west-2")
+        raise Exception(
+            "Unsupported region based on demo data location,"
+            " please switch to us-east-1 or us-west-2"
+        )
diff --git a/source/examples/rapids-sagemaker-hpo/notebook.ipynb b/source/examples/rapids-sagemaker-hpo/notebook.ipynb
index 47c2a1fe..9ab5d7b0 100644
--- a/source/examples/rapids-sagemaker-hpo/notebook.ipynb
+++ b/source/examples/rapids-sagemaker-hpo/notebook.ipynb
@@ -778,7 +778,9 @@
    },
    "outputs": [],
    "source": [
-    "ecr_fullname = f\"{account[0]}.dkr.ecr.{region[0]}.amazonaws.com/{image_base}:{image_tag}\""
+    "ecr_fullname = (\n",
+    "    f\"{account[0]}.dkr.ecr.{region[0]}.amazonaws.com/{image_base}:{image_tag}\"\n",
+    ")"
    ]
   },
   {
@@ -1989,7 +1991,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "endpoint_model = sagemaker.model.Model(image_uri=ecr_fullname, role=execution_role, model_data=s3_path_to_best_model)"
+    "endpoint_model = sagemaker.model.Model(\n",
+    "    image_uri=ecr_fullname, role=execution_role, model_data=s3_path_to_best_model\n",
+    ")"
    ]
   },
   {
@@ -2045,7 +2049,9 @@
     "DEMO_SERVING_FLAG = True\n",
     "\n",
     "if DEMO_SERVING_FLAG:\n",
-    "    endpoint_model.deploy(initial_instance_count=1, instance_type=\"ml.g4dn.2xlarge\")  #'ml.p3.2xlarge'"
+    "    endpoint_model.deploy(\n",
+    "        initial_instance_count=1, instance_type=\"ml.g4dn.2xlarge\"\n",
+    "    )  #'ml.p3.2xlarge'"
    ]
   },
   {
diff --git a/source/examples/rapids-sagemaker-hpo/serve.py b/source/examples/rapids-sagemaker-hpo/serve.py
index b8a01437..380fe867 100644
--- a/source/examples/rapids-sagemaker-hpo/serve.py
+++ b/source/examples/rapids-sagemaker-hpo/serve.py
@@ -123,7 +123,8 @@ def predict():
 
         except Exception:
             return Response(
-                response="Unable to parse input data" "[ should be json/string encoded list of arrays ]",
+                response="Unable to parse input data"
+                "[ should be json/string encoded list of arrays ]",
                 status=415,
                 mimetype="text/csv",
             )
@@ -134,7 +135,9 @@ def predict():
         try:
             start_time = time.perf_counter()
             if model_type == "XGBoost":
-                app.logger.info("running inference using XGBoost model :" f"{model_filename}")
+                app.logger.info(
+                    "running inference using XGBoost model :" f"{model_filename}"
+                )
 
                 if GPU_INFERENCE_FLAG:
                     predictions = reloaded_model.predict(query_data)
@@ -145,18 +148,28 @@ def predict():
                 predictions = (predictions > xgboost_threshold) * 1.0
 
             elif model_type == "RandomForest":
-                app.logger.info("running inference using RandomForest model :" f"{model_filename}")
+                app.logger.info(
+                    "running inference using RandomForest model :" f"{model_filename}"
+                )
 
                 if "gpu" in model_filename and not GPU_INFERENCE_FLAG:
-                    raise Exception("attempting to run CPU inference " "on a GPU trained RandomForest model")
+                    raise Exception(
+                        "attempting to run CPU inference "
+                        "on a GPU trained RandomForest model"
+                    )
 
                 predictions = reloaded_model.predict(query_data.astype("float32"))
 
             elif model_type == "KMeans":
-                app.logger.info("running inference using KMeans model :" f"{model_filename}")
+                app.logger.info(
+                    "running inference using KMeans model :" f"{model_filename}"
+                )
 
                 if "gpu" in model_filename and not GPU_INFERENCE_FLAG:
-                    raise Exception("attempting to run CPU inference " "on a GPU trained KMeans model")
+                    raise Exception(
+                        "attempting to run CPU inference "
+                        "on a GPU trained KMeans model"
+                    )
 
                 predictions = reloaded_model.predict(query_data.astype("float32"))
 
diff --git a/source/examples/rapids-sagemaker-hpo/train.py b/source/examples/rapids-sagemaker-hpo/train.py
index 4239e79a..7b25053a 100644
--- a/source/examples/rapids-sagemaker-hpo/train.py
+++ b/source/examples/rapids-sagemaker-hpo/train.py
@@ -35,7 +35,9 @@ def train():
         dataset = ml_workflow.handle_missing_data(dataset)
 
         # split into train and test set
-        X_train, X_test, y_train, y_test = ml_workflow.split_dataset(dataset, random_state=i_fold)
+        X_train, X_test, y_train, y_test = ml_workflow.split_dataset(
+            dataset, random_state=i_fold
+        )
 
         # train model
         trained_model = ml_workflow.fit(X_train, y_train)
@@ -59,7 +61,9 @@ def train():
 def configure_logging():
     hpo_log = logging.getLogger("hpo_log")
     log_handler = logging.StreamHandler()
-    log_handler.setFormatter(logging.Formatter("%(asctime)-15s %(levelname)8s %(name)s %(message)s"))
+    log_handler.setFormatter(
+        logging.Formatter("%(asctime)-15s %(levelname)8s %(name)s %(message)s")
+    )
     hpo_log.addHandler(log_handler)
     hpo_log.setLevel(logging.DEBUG)
     hpo_log.propagate = False
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
index 25388834..f9ca0ed6 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiCPU.py
@@ -64,7 +64,9 @@ def cluster_initialize(self):
         dask.config.set(
             {
                 "temporary_directory": self.hpo_config.output_artifacts_directory,
-                "logging": {"loggers": {"distributed.nanny": {"level": "CRITICAL"}}},  # noqa
+                "logging": {
+                    "loggers": {"distributed.nanny": {"level": "CRITICAL"}}
+                },  # noqa
             }
         )
 
@@ -80,7 +82,9 @@ def ingest_data(self):
         if "Parquet" in self.hpo_config.input_file_type:
             hpo_log.info("> parquet data ingestion")
 
-            dataset = dask.dataframe.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)
+            dataset = dask.dataframe.read_parquet(
+                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
+            )
 
         elif "CSV" in self.hpo_config.input_file_type:
             hpo_log.info("> csv data ingestion")
@@ -208,7 +212,9 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
+            output_filename = os.path.join(
+                self.hpo_config.model_store_directory, filename
+            )
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_mcpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
index f0840f52..15ec66ef 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowMultiGPU.py
@@ -70,7 +70,9 @@ def cluster_initialize(self):
         dask.config.set(
             {
                 "temporary_directory": self.hpo_config.output_artifacts_directory,
-                "logging": {"loggers": {"distributed.nanny": {"level": "CRITICAL"}}},  # noqa
+                "logging": {
+                    "loggers": {"distributed.nanny": {"level": "CRITICAL"}}
+                },  # noqa
             }
         )
 
@@ -86,7 +88,9 @@ def ingest_data(self):
         if "Parquet" in self.hpo_config.input_file_type:
             hpo_log.info("> parquet data ingestion")
 
-            dataset = dask_cudf.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)
+            dataset = dask_cudf.read_parquet(
+                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
+            )
 
         elif "CSV" in self.hpo_config.input_file_type:
             hpo_log.info("> csv data ingestion")
@@ -185,7 +189,9 @@ def predict(self, trained_model, X_test, threshold=0.5):
         hpo_log.info("> predict with trained model ")
         if "XGBoost" in self.hpo_config.model_type:
             dtest = xgboost.dask.DaskDMatrix(self.client, X_test)
-            predictions = xgboost.dask.predict(self.client, trained_model, dtest).compute()
+            predictions = xgboost.dask.predict(
+                self.client, trained_model, dtest
+            ).compute()
 
             predictions = (predictions > threshold) * 1.0
 
@@ -217,7 +223,9 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
+            output_filename = os.path.join(
+                self.hpo_config.model_store_directory, filename
+            )
 
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_mgpu_xgb")
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
index 6345ec7b..47fe8768 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleCPU.py
@@ -166,7 +166,9 @@ def predict(self, trained_model, X_test, threshold=0.5):
     def score(self, y_test, predictions):
         """Score predictions vs ground truth labels on test data"""
         dataset_dtype = self.hpo_config.dataset_dtype
-        score = accuracy_score(y_test.astype(dataset_dtype), predictions.astype(dataset_dtype))
+        score = accuracy_score(
+            y_test.astype(dataset_dtype), predictions.astype(dataset_dtype)
+        )
 
         hpo_log.info(f"\t score = {score}")
         self.cv_fold_scores.append(score)
@@ -178,7 +180,9 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("> saving high-scoring model")
-            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
+            output_filename = os.path.join(
+                self.hpo_config.model_store_directory, filename
+            )
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_scpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
index a0895086..d9cc6674 100644
--- a/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
+++ b/source/examples/rapids-sagemaker-hpo/workflows/MLWorkflowSingleGPU.py
@@ -53,7 +53,9 @@ def ingest_data(self):
             return self.dataset_cache
 
         if "Parquet" in self.hpo_config.input_file_type:
-            dataset = cudf.read_parquet(self.hpo_config.target_files, columns=self.hpo_config.dataset_columns)  # noqa
+            dataset = cudf.read_parquet(
+                self.hpo_config.target_files, columns=self.hpo_config.dataset_columns
+            )  # noqa
 
         elif "CSV" in self.hpo_config.input_file_type:
             if isinstance(self.hpo_config.target_files, list):
@@ -62,9 +64,14 @@ def ingest_data(self):
                 filepath = self.hpo_config.target_files
 
             hpo_log.info(self.hpo_config.dataset_columns)
-            dataset = cudf.read_csv(filepath, names=self.hpo_config.dataset_columns, header=0)
+            dataset = cudf.read_csv(
+                filepath, names=self.hpo_config.dataset_columns, header=0
+            )
 
-        hpo_log.info(f"ingested {self.hpo_config.input_file_type} dataset;" f" shape = {dataset.shape}")
+        hpo_log.info(
+            f"ingested {self.hpo_config.input_file_type} dataset;"
+            f" shape = {dataset.shape}"
+        )
 
         self.dataset_cache = dataset
         return dataset
@@ -86,7 +93,9 @@ def split_dataset(self, dataset, random_state):
         hpo_log.info("> train-test split")
         label_column = self.hpo_config.label_column
 
-        X_train, X_test, y_train, y_test = train_test_split(dataset, label_column, random_state=random_state)
+        X_train, X_test, y_train, y_test = train_test_split(
+            dataset, label_column, random_state=random_state
+        )
 
         return (
             X_train.astype(self.hpo_config.dataset_dtype),
@@ -148,7 +157,9 @@ def predict(self, trained_model, X_test, threshold=0.5):
     def score(self, y_test, predictions):
         """Score predictions vs ground truth labels on test data"""
         dataset_dtype = self.hpo_config.dataset_dtype
-        score = accuracy_score(y_test.astype(dataset_dtype), predictions.astype(dataset_dtype))
+        score = accuracy_score(
+            y_test.astype(dataset_dtype), predictions.astype(dataset_dtype)
+        )
 
         hpo_log.info(f"score = {round(score,5)}")
         self.cv_fold_scores.append(score)
@@ -160,7 +171,9 @@ def save_best_model(self, score, trained_model, filename="saved_model"):
         if score > self.best_score:
             self.best_score = score
             hpo_log.info("saving high-scoring model")
-            output_filename = os.path.join(self.hpo_config.model_store_directory, filename)
+            output_filename = os.path.join(
+                self.hpo_config.model_store_directory, filename
+            )
             if "XGBoost" in self.hpo_config.model_type:
                 trained_model.save_model(f"{output_filename}_sgpu_xgb")
             elif "RandomForest" in self.hpo_config.model_type:
diff --git a/source/examples/time-series-forecasting-with-hpo/notebook.ipynb b/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
index 89e9dbfd..a85dd241 100644
--- a/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
+++ b/source/examples/time-series-forecasting-with-hpo/notebook.ipynb
@@ -364,7 +364,9 @@
    "source": [
     "train_df = cudf.read_csv(raw_data_dir / \"sales_train_evaluation.csv\")\n",
     "prices_df = cudf.read_csv(raw_data_dir / \"sell_prices.csv\")\n",
-    "calendar_df = cudf.read_csv(raw_data_dir / \"calendar.csv\").rename(columns={\"d\": \"day_id\"})"
+    "calendar_df = cudf.read_csv(raw_data_dir / \"calendar.csv\").rename(\n",
+    "    columns={\"d\": \"day_id\"}\n",
+    ")"
    ]
   },
   {
@@ -1402,7 +1404,9 @@
    ],
    "source": [
     "index_columns = [\"id\", \"item_id\", \"dept_id\", \"cat_id\", \"store_id\", \"state_id\"]\n",
-    "grid_df = cudf.melt(train_df, id_vars=index_columns, var_name=\"day_id\", value_name=TARGET)\n",
+    "grid_df = cudf.melt(\n",
+    "    train_df, id_vars=index_columns, var_name=\"day_id\", value_name=TARGET\n",
+    ")\n",
     "grid_df"
    ]
   },
@@ -1623,11 +1627,15 @@
     "    temp_df[\"day_id\"] = \"d_\" + str(END_TRAIN + i)\n",
     "    temp_df[TARGET] = np.nan  # Sales amount at time (n + i) is unknown\n",
     "    add_grid = cudf.concat([add_grid, temp_df])\n",
-    "add_grid[\"day_id\"] = add_grid[\"day_id\"].astype(\"category\")  # The day_id column is categorical, after cudf.melt\n",
+    "add_grid[\"day_id\"] = add_grid[\"day_id\"].astype(\n",
+    "    \"category\"\n",
+    ")  # The day_id column is categorical, after cudf.melt\n",
     "\n",
     "grid_df = cudf.concat([grid_df, add_grid])\n",
     "grid_df = grid_df.reset_index(drop=True)\n",
-    "grid_df[\"sales\"] = grid_df[\"sales\"].astype(np.float32)  # Use float32 type for sales column, to conserve memory\n",
+    "grid_df[\"sales\"] = grid_df[\"sales\"].astype(\n",
+    "    np.float32\n",
+    ")  # Use float32 type for sales column, to conserve memory\n",
     "grid_df"
    ]
   },
@@ -2074,7 +2082,9 @@
     }
    ],
    "source": [
-    "release_df = prices_df.groupby([\"store_id\", \"item_id\"])[\"wm_yr_wk\"].agg(\"min\").reset_index()\n",
+    "release_df = (\n",
+    "    prices_df.groupby([\"store_id\", \"item_id\"])[\"wm_yr_wk\"].agg(\"min\").reset_index()\n",
+    ")\n",
     "release_df.columns = [\"store_id\", \"item_id\", \"release_week\"]\n",
     "release_df"
    ]
@@ -3105,7 +3115,9 @@
    ],
    "source": [
     "grid_df = grid_df[grid_df[\"wm_yr_wk\"] >= grid_df[\"release_week\"]].reset_index(drop=True)\n",
-    "grid_df[\"wm_yr_wk\"] = grid_df[\"wm_yr_wk\"].astype(np.int32)  # Convert wm_yr_wk column to int32, to conserve memory\n",
+    "grid_df[\"wm_yr_wk\"] = grid_df[\"wm_yr_wk\"].astype(\n",
+    "    np.int32\n",
+    ")  # Convert wm_yr_wk column to int32, to conserve memory\n",
     "grid_df"
    ]
   },
@@ -3418,13 +3430,21 @@
    "outputs": [],
    "source": [
     "# Highest price over all weeks\n",
-    "prices_df[\"price_max\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"max\")\n",
+    "prices_df[\"price_max\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"max\")\n",
     "# Lowest price over all weeks\n",
-    "prices_df[\"price_min\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"min\")\n",
+    "prices_df[\"price_min\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"min\")\n",
     "# Standard deviation of the price\n",
-    "prices_df[\"price_std\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"std\")\n",
+    "prices_df[\"price_std\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"std\")\n",
     "# Mean (average) price over all weeks\n",
-    "prices_df[\"price_mean\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"mean\")"
+    "prices_df[\"price_mean\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"mean\")"
    ]
   },
   {
@@ -3464,7 +3484,9 @@
    },
    "outputs": [],
    "source": [
-    "prices_df[\"price_nunique\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].transform(\"nunique\")"
+    "prices_df[\"price_nunique\"] = prices_df.groupby([\"store_id\", \"item_id\"])[\n",
+    "    \"sell_price\"\n",
+    "].transform(\"nunique\")"
    ]
   },
   {
@@ -3484,7 +3506,9 @@
    },
    "outputs": [],
    "source": [
-    "prices_df[\"item_nunique\"] = prices_df.groupby([\"store_id\", \"sell_price\"])[\"item_id\"].transform(\"nunique\")"
+    "prices_df[\"item_nunique\"] = prices_df.groupby([\"store_id\", \"sell_price\"])[\n",
+    "    \"item_id\"\n",
+    "].transform(\"nunique\")"
    ]
   },
   {
@@ -3746,7 +3770,9 @@
    "outputs": [],
    "source": [
     "# Add \"month\" and \"year\" columns to prices_df\n",
-    "week_to_month_map = calendar_df[[\"wm_yr_wk\", \"month\", \"year\"]].drop_duplicates(subset=[\"wm_yr_wk\"])\n",
+    "week_to_month_map = calendar_df[[\"wm_yr_wk\", \"month\", \"year\"]].drop_duplicates(\n",
+    "    subset=[\"wm_yr_wk\"]\n",
+    ")\n",
     "prices_df = prices_df.merge(week_to_month_map, on=[\"wm_yr_wk\"], how=\"left\")\n",
     "\n",
     "# Sort by wm_yr_wk. The rows will also be sorted in ascending months and years.\n",
@@ -3763,17 +3789,17 @@
    "outputs": [],
    "source": [
     "# Compare with the average price in the previous week\n",
-    "prices_df[\"price_momentum\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\"])[\"sell_price\"].shift(\n",
-    "    1\n",
-    ")\n",
+    "prices_df[\"price_momentum\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
+    "    [\"store_id\", \"item_id\"]\n",
+    ")[\"sell_price\"].shift(1)\n",
     "# Compare with the average price in the previous month\n",
-    "prices_df[\"price_momentum_m\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\", \"month\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"mean\")\n",
+    "prices_df[\"price_momentum_m\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
+    "    [\"store_id\", \"item_id\", \"month\"]\n",
+    ")[\"sell_price\"].transform(\"mean\")\n",
     "# Compare with the average price in the previous year\n",
-    "prices_df[\"price_momentum_y\"] = prices_df[\"sell_price\"] / prices_df.groupby([\"store_id\", \"item_id\", \"year\"])[\n",
-    "    \"sell_price\"\n",
-    "].transform(\"mean\")"
+    "prices_df[\"price_momentum_y\"] = prices_df[\"sell_price\"] / prices_df.groupby(\n",
+    "    [\"store_id\", \"item_id\", \"year\"]\n",
+    ")[\"sell_price\"].transform(\"mean\")"
    ]
   },
   {
@@ -4127,8 +4153,12 @@
     "# After merging price_df, keep columns id and day_id from grid_df and drop all other columns from grid_df\n",
     "original_columns = list(grid_df)\n",
     "grid_df_with_price = grid_df.copy()\n",
-    "grid_df_with_price = grid_df_with_price.merge(prices_df, on=[\"store_id\", \"item_id\", \"wm_yr_wk\"], how=\"left\")\n",
-    "columns_to_keep = [\"id\", \"day_id\"] + [col for col in list(grid_df_with_price) if col not in original_columns]\n",
+    "grid_df_with_price = grid_df_with_price.merge(\n",
+    "    prices_df, on=[\"store_id\", \"item_id\", \"wm_yr_wk\"], how=\"left\"\n",
+    ")\n",
+    "columns_to_keep = [\"id\", \"day_id\"] + [\n",
+    "    col for col in list(grid_df_with_price) if col not in original_columns\n",
+    "]\n",
     "grid_df_with_price = grid_df_with_price[[\"id\", \"day_id\"] + columns_to_keep]\n",
     "grid_df_with_price"
    ]
@@ -4395,7 +4425,9 @@
     "    \"snap_TX\",\n",
     "    \"snap_WI\",\n",
     "]\n",
-    "grid_df_with_calendar = grid_df_id_only.merge(calendar_df[icols], on=[\"day_id\"], how=\"left\")\n",
+    "grid_df_with_calendar = grid_df_id_only.merge(\n",
+    "    calendar_df[icols], on=[\"day_id\"], how=\"left\"\n",
+    ")\n",
     "grid_df_with_calendar"
    ]
   },
@@ -4745,14 +4777,22 @@
     "import cupy as cp\n",
     "\n",
     "grid_df_with_calendar[\"tm_d\"] = grid_df_with_calendar[\"date\"].dt.day.astype(np.int8)\n",
-    "grid_df_with_calendar[\"tm_w\"] = grid_df_with_calendar[\"date\"].dt.isocalendar().week.astype(np.int8)\n",
+    "grid_df_with_calendar[\"tm_w\"] = (\n",
+    "    grid_df_with_calendar[\"date\"].dt.isocalendar().week.astype(np.int8)\n",
+    ")\n",
     "grid_df_with_calendar[\"tm_m\"] = grid_df_with_calendar[\"date\"].dt.month.astype(np.int8)\n",
     "grid_df_with_calendar[\"tm_y\"] = grid_df_with_calendar[\"date\"].dt.year\n",
-    "grid_df_with_calendar[\"tm_y\"] = (grid_df_with_calendar[\"tm_y\"] - grid_df_with_calendar[\"tm_y\"].min()).astype(np.int8)\n",
-    "grid_df_with_calendar[\"tm_wm\"] = cp.ceil(grid_df_with_calendar[\"tm_d\"].to_cupy() / 7).astype(\n",
+    "grid_df_with_calendar[\"tm_y\"] = (\n",
+    "    grid_df_with_calendar[\"tm_y\"] - grid_df_with_calendar[\"tm_y\"].min()\n",
+    ").astype(np.int8)\n",
+    "grid_df_with_calendar[\"tm_wm\"] = cp.ceil(\n",
+    "    grid_df_with_calendar[\"tm_d\"].to_cupy() / 7\n",
+    ").astype(\n",
     "    np.int8\n",
     ")  # which week in tje month?\n",
-    "grid_df_with_calendar[\"tm_dw\"] = grid_df_with_calendar[\"date\"].dt.dayofweek.astype(np.int8)  # which day in the week?\n",
+    "grid_df_with_calendar[\"tm_dw\"] = grid_df_with_calendar[\"date\"].dt.dayofweek.astype(\n",
+    "    np.int8\n",
+    ")  # which day in the week?\n",
     "grid_df_with_calendar[\"tm_w_end\"] = (grid_df_with_calendar[\"tm_dw\"] >= 5).astype(\n",
     "    np.int8\n",
     ")  # whether today is in the weekend\n",
@@ -4812,7 +4852,10 @@
     "grid_df_lags = grid_df_lags.sort_values([\"id\", \"day_id\"])\n",
     "\n",
     "grid_df_lags = grid_df_lags.assign(\n",
-    "    **{f\"sales_lag_{ld}\": grid_df_lags.groupby([\"id\"])[\"sales\"].shift(ld) for ld in LAG_DAYS}\n",
+    "    **{\n",
+    "        f\"sales_lag_{ld}\": grid_df_lags.groupby([\"id\"])[\"sales\"].shift(ld)\n",
+    "        for ld in LAG_DAYS\n",
+    "    }\n",
     ")"
    ]
   },
@@ -5206,10 +5249,18 @@
     "for i in [7, 14, 30, 60, 180]:\n",
     "    print(f\"    Window size: {i}\")\n",
     "    grid_df_lags[f\"rolling_mean_{i}\"] = (\n",
-    "        grid_df_lags.groupby([\"id\"])[\"sales\"].shift(SHIFT_DAY).rolling(i).mean().astype(np.float32)\n",
+    "        grid_df_lags.groupby([\"id\"])[\"sales\"]\n",
+    "        .shift(SHIFT_DAY)\n",
+    "        .rolling(i)\n",
+    "        .mean()\n",
+    "        .astype(np.float32)\n",
     "    )\n",
     "    grid_df_lags[f\"rolling_std_{i}\"] = (\n",
-    "        grid_df_lags.groupby([\"id\"])[\"sales\"].shift(SHIFT_DAY).rolling(i).std().astype(np.float32)\n",
+    "        grid_df_lags.groupby([\"id\"])[\"sales\"]\n",
+    "        .shift(SHIFT_DAY)\n",
+    "        .rolling(i)\n",
+    "        .std()\n",
+    "        .astype(np.float32)\n",
     "    )"
    ]
   },
@@ -5726,7 +5777,9 @@
     "icols = [[\"store_id\", \"dept_id\"], [\"item_id\", \"state_id\"]]\n",
     "new_columns = []\n",
     "\n",
-    "grid_df_target_enc = grid_df[[\"id\", \"day_id\", \"item_id\", \"state_id\", \"store_id\", \"dept_id\", \"sales\"]].copy()\n",
+    "grid_df_target_enc = grid_df[\n",
+    "    [\"id\", \"day_id\", \"item_id\", \"state_id\", \"store_id\", \"dept_id\", \"sales\"]\n",
+    "].copy()\n",
     "grid_df_target_enc[\"sales\"].fillna(value=0, inplace=True)\n",
     "\n",
     "for col in icols:\n",
@@ -6100,7 +6153,9 @@
     "    if dept is None:\n",
     "        grid1 = grid_df[grid_df[\"store_id\"] == store]\n",
     "    else:\n",
-    "        grid1 = grid_df[(grid_df[\"store_id\"] == store) & (grid_df[\"dept_id\"] == dept)].drop(columns=[\"dept_id\"])\n",
+    "        grid1 = grid_df[\n",
+    "            (grid_df[\"store_id\"] == store) & (grid_df[\"dept_id\"] == dept)\n",
+    "        ].drop(columns=[\"dept_id\"])\n",
     "    grid1 = grid1.drop(columns=[\"release_week\", \"wm_yr_wk\", \"store_id\", \"state_id\"])\n",
     "\n",
     "    grid2 = grid_df_with_price[[\"id\", \"day_id\"] + grid2_colnm]\n",
@@ -6121,7 +6176,13 @@
     "    gc.collect()\n",
     "\n",
     "    grid_combined = grid_combined.drop(columns=[\"id\"])\n",
-    "    grid_combined[\"day_id\"] = grid_combined[\"day_id\"].to_pandas().astype(\"str\").apply(lambda x: x[2:]).astype(np.int16)\n",
+    "    grid_combined[\"day_id\"] = (\n",
+    "        grid_combined[\"day_id\"]\n",
+    "        .to_pandas()\n",
+    "        .astype(\"str\")\n",
+    "        .apply(lambda x: x[2:])\n",
+    "        .astype(np.int16)\n",
+    "    )\n",
     "\n",
     "    return grid_combined"
    ]
@@ -6226,7 +6287,9 @@
     "for store in STORES:\n",
     "    print(f\"Processing store {store}...\")\n",
     "    segment_df = prepare_data(store=store)\n",
-    "    segment_df.to_pandas().to_pickle(segmented_data_dir / f\"combined_df_store_{store}.pkl\")\n",
+    "    segment_df.to_pandas().to_pickle(\n",
+    "        segmented_data_dir / f\"combined_df_store_{store}.pkl\"\n",
+    "    )\n",
     "    del segment_df\n",
     "    gc.collect()\n",
     "\n",
@@ -6234,7 +6297,9 @@
     "    for dept in DEPTS:\n",
     "        print(f\"Processing (store {store}, department {dept})...\")\n",
     "        segment_df = prepare_data(store=store, dept=dept)\n",
-    "        segment_df.to_pandas().to_pickle(segmented_data_dir / f\"combined_df_store_{store}_dept_{dept}.pkl\")\n",
+    "        segment_df.to_pandas().to_pickle(\n",
+    "            segmented_data_dir / f\"combined_df_store_{store}_dept_{dept}.pkl\"\n",
+    "        )\n",
     "        del segment_df\n",
     "        gc.collect()"
    ]
@@ -6964,7 +7029,11 @@
     "    df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
     "\n",
     "    # Compute denominator: 1/(n-1) * sum( (y(t) - y(t-1))**2 )\n",
-    "    diff = df_train.sort_values([\"item_id\", \"day_id\"]).groupby([\"item_id\"])[[\"sales\"]].diff(1)\n",
+    "    diff = (\n",
+    "        df_train.sort_values([\"item_id\", \"day_id\"])\n",
+    "        .groupby([\"item_id\"])[[\"sales\"]]\n",
+    "        .diff(1)\n",
+    "    )\n",
     "    x = (\n",
     "        df_train[[\"item_id\", \"day_id\"]]\n",
     "        .join(diff, how=\"left\")\n",
@@ -7039,7 +7108,9 @@
     "        \"alpha\": trial.suggest_float(\"alpha\", 1e-8, 100.0, log=True),\n",
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 6, step=1),\n",
-    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
+    "        \"min_child_weight\": trial.suggest_float(\n",
+    "            \"min_child_weight\", 1e-8, 100, log=True\n",
+    "        ),\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
     "        \"tweedie_variance_power\": trial.suggest_float(\"tweedie_variance_power\", 1, 2),\n",
     "    }\n",
@@ -7050,19 +7121,29 @@
     "        with fs.open(f\"{bucket_name}/combined_df_store_{store}.pkl\", \"rb\") as f:\n",
     "            df = cudf.DataFrame(pd.read_pickle(f))\n",
     "        for train_mask, valid_mask in cv_folds:\n",
-    "            df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
-    "            df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
+    "            df_train = df[\n",
+    "                (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
+    "            ]\n",
+    "            df_valid = df[\n",
+    "                (df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])\n",
+    "            ]\n",
     "\n",
     "            X_train, y_train = (\n",
-    "                df_train.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
+    "                df_train.drop(\n",
+    "                    columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
+    "                ),\n",
     "                df_train[\"sales\"],\n",
     "            )\n",
-    "            X_valid = df_valid.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
+    "            X_valid = df_valid.drop(\n",
+    "                columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
+    "            )\n",
     "\n",
     "            clf = xgb.XGBRegressor(**params)\n",
     "            clf.fit(X_train, y_train)\n",
     "            pred_sales = clf.predict(X_valid)\n",
-    "            scores[store_id].append(wrmsse(product_weights, df, pred_sales, train_mask, valid_mask))\n",
+    "            scores[store_id].append(\n",
+    "                wrmsse(product_weights, df, pred_sales, train_mask, valid_mask)\n",
+    "            )\n",
     "            del df_train, df_valid, X_train, y_train, clf\n",
     "            gc.collect()\n",
     "        del df\n",
@@ -7157,7 +7238,9 @@
     "    for fut in partition[\"futures\"]:\n",
     "        _ = fut.result()  # Ensure that the training job was successful\n",
     "    tnow = time.perf_counter()\n",
-    "    print(f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\")\n",
+    "    print(\n",
+    "        f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\"\n",
+    "    )\n",
     "tend = time.perf_counter()\n",
     "print(f\"Total time elapsed = {tend - tstart}\")"
    ]
@@ -7398,7 +7481,9 @@
     "    df_test = df[(df[\"day_id\"] >= holdout[0]) & (df[\"day_id\"] < holdout[1])]\n",
     "    X_test = df_test.drop(columns=[\"item_id\", \"dept_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "    pred_sales = model[store].predict(X_test)\n",
-    "    test_wrmsse += wrmsse(product_weights, df, pred_sales, train_mask=[0, 1914], valid_mask=holdout)\n",
+    "    test_wrmsse += wrmsse(\n",
+    "        product_weights, df, pred_sales, train_mask=[0, 1914], valid_mask=holdout\n",
+    "    )\n",
     "print(f\"WRMSSE metric on the held-out test set: {test_wrmsse}\")"
    ]
   },
@@ -7453,7 +7538,9 @@
     "        \"alpha\": trial.suggest_float(\"alpha\", 1e-8, 100.0, log=True),\n",
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 6, step=1),\n",
-    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
+    "        \"min_child_weight\": trial.suggest_float(\n",
+    "            \"min_child_weight\", 1e-8, 100, log=True\n",
+    "        ),\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
     "        \"tweedie_variance_power\": trial.suggest_float(\"tweedie_variance_power\", 1, 2),\n",
     "    }\n",
@@ -7462,17 +7549,25 @@
     "    for store_id, store in enumerate(STORES):\n",
     "        for dept_id, dept in enumerate(DEPTS):\n",
     "            print(f\"Processing store {store}, department {dept}...\")\n",
-    "            with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
+    "            with fs.open(\n",
+    "                f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
+    "            ) as f:\n",
     "                df = cudf.DataFrame(pd.read_pickle(f))\n",
     "            for train_mask, valid_mask in cv_folds:\n",
-    "                df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
-    "                df_valid = df[(df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])]\n",
+    "                df_train = df[\n",
+    "                    (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
+    "                ]\n",
+    "                df_valid = df[\n",
+    "                    (df[\"day_id\"] >= valid_mask[0]) & (df[\"day_id\"] < valid_mask[1])\n",
+    "                ]\n",
     "\n",
     "                X_train, y_train = (\n",
     "                    df_train.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
     "                    df_train[\"sales\"],\n",
     "                )\n",
-    "                X_valid = df_valid.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
+    "                X_valid = df_valid.drop(\n",
+    "                    columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]\n",
+    "                )\n",
     "\n",
     "                clf = xgb.XGBRegressor(**params)\n",
     "                clf.fit(X_train, y_train)\n",
@@ -7566,7 +7661,9 @@
     "    for fut in partition[\"futures\"]:\n",
     "        _ = fut.result()  # Ensure that the training job was successful\n",
     "    tnow = time.perf_counter()\n",
-    "    print(f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\")\n",
+    "    print(\n",
+    "        f\"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}\"\n",
+    "    )\n",
     "tend = time.perf_counter()\n",
     "print(f\"Total time elapsed = {tend - tstart}\")"
    ]
@@ -7652,10 +7749,14 @@
     "    for _, store in enumerate(STORES):\n",
     "        for _, dept in enumerate(DEPTS):\n",
     "            print(f\"Processing store {store}, department {dept}...\")\n",
-    "            with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
+    "            with fs.open(\n",
+    "                f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
+    "            ) as f:\n",
     "                df = cudf.DataFrame(pd.read_pickle(f))\n",
     "            for train_mask, _ in cv_folds:\n",
-    "                df_train = df[(df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])]\n",
+    "                df_train = df[\n",
+    "                    (df[\"day_id\"] >= train_mask[0]) & (df[\"day_id\"] < train_mask[1])\n",
+    "                ]\n",
     "                X_train, y_train = (\n",
     "                    df_train.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"]),\n",
     "                    df_train[\"sales\"],\n",
@@ -7838,12 +7939,16 @@
     "    df_test[\"pred2\"] = [np.nan] * len(df_test)\n",
     "    df_test[\"pred2\"] = df_test[\"pred2\"].astype(\"float32\")\n",
     "    for dept in DEPTS:\n",
-    "        with fs.open(f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\") as f:\n",
+    "        with fs.open(\n",
+    "            f\"{bucket_name}/combined_df_store_{store}_dept_{dept}.pkl\", \"rb\"\n",
+    "        ) as f:\n",
     "            df2 = cudf.DataFrame(pd.read_pickle(f))\n",
     "        df2_test = df2[(df2[\"day_id\"] >= holdout[0]) & (df2[\"day_id\"] < holdout[1])]\n",
     "        X_test = df2_test.drop(columns=[\"item_id\", \"cat_id\", \"day_id\", \"sales\"])\n",
     "        assert np.sum(df_test[\"dept_id\"] == dept) == len(X_test)\n",
-    "        df_test[\"pred2\"][df_test[\"dept_id\"] == dept] = model_alt[(store, dept)].predict(X_test)\n",
+    "        df_test[\"pred2\"][df_test[\"dept_id\"] == dept] = model_alt[(store, dept)].predict(\n",
+    "            X_test\n",
+    "        )\n",
     "\n",
     "    # Average prediction\n",
     "    df_test[\"avg_pred\"] = (df_test[\"pred1\"] + df_test[\"pred2\"]) / 2.0\n",
diff --git a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
index 6bb57c30..73cf685e 100644
--- a/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
+++ b/source/examples/xgboost-azure-mnmg-daskcloudprovider/notebook.ipynb
@@ -1380,7 +1380,9 @@
     "\n",
     "pp = pprint.PrettyPrinter()\n",
     "\n",
-    "pp.pprint(client.scheduler_info())  # will show some information of the GPUs of the workers"
+    "pp.pprint(\n",
+    "    client.scheduler_info()\n",
+    ")  # will show some information of the GPUs of the workers"
    ]
   },
   {
@@ -1701,7 +1703,9 @@
     "    taxi_data = taxi_data[fields]\n",
     "    taxi_data = taxi_data.reset_index()\n",
     "\n",
-    "    return persist_train_infer_split(client, taxi_data, response_dtype, response_id, infer_frac, random_state)"
+    "    return persist_train_infer_split(\n",
+    "        client, taxi_data, response_dtype, response_id, infer_frac, random_state\n",
+    "    )"
    ]
   },
   {
@@ -2162,7 +2166,9 @@
    "source": [
     "data_train = xgb.dask.DaskDMatrix(client, X_train, y_train)\n",
     "tic = timer()\n",
-    "xgboost_output = xgb.dask.train(client, params, data_train, num_boost_round=params[\"num_boost_rounds\"])\n",
+    "xgboost_output = xgb.dask.train(\n",
+    "    client, params, data_train, num_boost_round=params[\"num_boost_rounds\"]\n",
+    ")\n",
     "xgb_gpu_model = xgboost_output[\"booster\"]\n",
     "toc = timer()\n",
     "print(f\"Wall clock time taken for this cell : {toc-tic} s\")"
@@ -2442,7 +2448,9 @@
    ],
    "source": [
     "tic = timer()\n",
-    "predictions = X_infer.map_partitions(predict_model, meta=\"float\")  # this is like MPI reduce\n",
+    "predictions = X_infer.map_partitions(\n",
+    "    predict_model, meta=\"float\"\n",
+    ")  # this is like MPI reduce\n",
     "y_pred = predictions.compute()\n",
     "wait(y_pred)\n",
     "toc = timer()\n",
@@ -2464,7 +2472,9 @@
    ],
    "source": [
     "rows_csv = X_infer.iloc[:, 0].shape[0].compute()\n",
-    "print(f\"It took {toc-tic} seconds to predict on {rows_csv} rows using FIL distributedly on each worker\")"
+    "print(\n",
+    "    f\"It took {toc-tic} seconds to predict on {rows_csv} rows using FIL distributedly on each worker\"\n",
+    ")"
    ]
   },
   {
diff --git a/source/examples/xgboost-dask-databricks/notebook.ipynb b/source/examples/xgboost-dask-databricks/notebook.ipynb
index 8a707187..a7e63b4a 100644
--- a/source/examples/xgboost-dask-databricks/notebook.ipynb
+++ b/source/examples/xgboost-dask-databricks/notebook.ipynb
@@ -480,7 +480,9 @@
     "# Check if the file already exists\n",
     "if not os.path.exists(file_path):\n",
     "    # If not, download dataset to the directory\n",
-    "    data_url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\"\n",
+    "    data_url = (\n",
+    "        \"https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\"\n",
+    "    )\n",
     "    download_command = f\"curl {data_url} --output {file_path}\"\n",
     "    subprocess.run(download_command, shell=True)\n",
     "\n",
@@ -1252,8 +1254,12 @@
     "    y = ddf[\"label\"]\n",
     "    X = ddf[ddf.columns.difference([\"label\"])]\n",
     "\n",
-    "    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33, random_state=42)\n",
-    "    X_train, X_valid, y_train, y_valid = client.persist([X_train, X_valid, y_train, y_valid])\n",
+    "    X_train, X_valid, y_train, y_valid = train_test_split(\n",
+    "        X, y, test_size=0.33, random_state=42\n",
+    "    )\n",
+    "    X_train, X_valid, y_train, y_valid = client.persist(\n",
+    "        [X_train, X_valid, y_train, y_valid]\n",
+    "    )\n",
     "    wait([X_train, X_valid, y_train, y_valid])\n",
     "\n",
     "    return X_train, X_valid, y_train, y_valid"
@@ -1684,7 +1690,9 @@
     "    # Use early stopping with custom objective and metric.\n",
     "    early_stopping_rounds = 5\n",
     "    # Specify the metric we want to use for early stopping.\n",
-    "    es = xgb.callback.EarlyStopping(rounds=early_stopping_rounds, save_best=True, metric_name=\"CustomErr\")\n",
+    "    es = xgb.callback.EarlyStopping(\n",
+    "        rounds=early_stopping_rounds, save_best=True, metric_name=\"CustomErr\"\n",
+    "    )\n",
     "\n",
     "    Xy = dxgb.DaskDeviceQuantileDMatrix(client, X, y)\n",
     "    Xy_valid = dxgb.DaskDMatrix(client, X_valid, y_valid)\n",
@@ -1734,7 +1742,9 @@
     }
    ],
    "source": [
-    "booster_custom = fit_model_customized_objective(client, X=X_train, y=y_train, X_valid=X_valid, y_valid=y_valid)\n",
+    "booster_custom = fit_model_customized_objective(\n",
+    "    client, X=X_train, y=y_train, X_valid=X_valid, y_valid=y_valid\n",
+    ")\n",
     "booster_custom"
    ]
   },
diff --git a/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb b/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
index 2b900ce3..944b106f 100644
--- a/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-job-parallel-k8s/notebook.ipynb
@@ -315,7 +315,10 @@
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
+    "            \"futures\": [\n",
+    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
+    "                for _ in range(*iter_range)\n",
+    "            ],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
@@ -409,7 +412,9 @@
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 10, step=1),\n",
     "        # minimum child weight, larger the term more conservative the tree.\n",
-    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
+    "        \"min_child_weight\": trial.suggest_float(\n",
+    "            \"min_child_weight\", 1e-8, 100, log=True\n",
+    "        ),\n",
     "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-8, 1.0, log=True),\n",
     "        # defines how selective algorithm is.\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
@@ -469,14 +474,19 @@
     "# Optimize in parallel on your Dask cluster\n",
     "backend_storage = optuna.storages.InMemoryStorage()\n",
     "dask_storage = optuna.integration.DaskStorage(storage=backend_storage, client=client)\n",
-    "study = optuna.create_study(direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage)\n",
+    "study = optuna.create_study(\n",
+    "    direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage\n",
+    ")\n",
     "futures = []\n",
     "for i in range(0, n_trials, n_workers * 4):\n",
     "    iter_range = (i, min([i + n_workers * 4, n_trials]))\n",
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
+    "            \"futures\": [\n",
+    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
+    "                for _ in range(*iter_range)\n",
+    "            ],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
diff --git a/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb b/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
index 4b1ab929..051464ac 100644
--- a/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-job-parallel-ngc/notebook.ipynb
@@ -1567,7 +1567,10 @@
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
+    "            \"futures\": [\n",
+    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
+    "                for _ in range(*iter_range)\n",
+    "            ],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
@@ -1663,7 +1666,9 @@
     "        \"colsample_bytree\": trial.suggest_float(\"colsample_bytree\", 0.2, 1.0),\n",
     "        \"max_depth\": trial.suggest_int(\"max_depth\", 2, 10, step=1),\n",
     "        # minimum child weight, larger the term more conservative the tree.\n",
-    "        \"min_child_weight\": trial.suggest_float(\"min_child_weight\", 1e-8, 100, log=True),\n",
+    "        \"min_child_weight\": trial.suggest_float(\n",
+    "            \"min_child_weight\", 1e-8, 100, log=True\n",
+    "        ),\n",
     "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-8, 1.0, log=True),\n",
     "        # defines how selective algorithm is.\n",
     "        \"gamma\": trial.suggest_float(\"gamma\", 1e-8, 1.0, log=True),\n",
@@ -1725,14 +1730,19 @@
     "# Optimize in parallel on your Dask cluster\n",
     "backend_storage = optuna.storages.InMemoryStorage()\n",
     "dask_storage = optuna.integration.DaskStorage(storage=backend_storage, client=client)\n",
-    "study = optuna.create_study(direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage)\n",
+    "study = optuna.create_study(\n",
+    "    direction=\"maximize\", sampler=RandomSampler(seed=0), storage=dask_storage\n",
+    ")\n",
     "futures = []\n",
     "for i in range(0, n_trials, n_workers * 4):\n",
     "    iter_range = (i, min([i + n_workers * 4, n_trials]))\n",
     "    futures.append(\n",
     "        {\n",
     "            \"range\": iter_range,\n",
-    "            \"futures\": [client.submit(study.optimize, objective, n_trials=1, pure=False) for _ in range(*iter_range)],\n",
+    "            \"futures\": [\n",
+    "                client.submit(study.optimize, objective, n_trials=1, pure=False)\n",
+    "                for _ in range(*iter_range)\n",
+    "            ],\n",
     "        }\n",
     "    )\n",
     "for partition in futures:\n",
diff --git a/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb b/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
index bcaeab88..524ed498 100644
--- a/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
+++ b/source/examples/xgboost-gpu-hpo-mnmg-parallel-k8s/notebook.ipynb
@@ -296,7 +296,9 @@
     "\n",
     "print(f\"{n_clusters=}\")\n",
     "if n_clusters == 0:\n",
-    "    raise ValueError(\"No cluster can be created. Reduce `n_worker_per_dask_cluster` or create more compute nodes\")\n",
+    "    raise ValueError(\n",
+    "        \"No cluster can be created. Reduce `n_worker_per_dask_cluster` or create more compute nodes\"\n",
+    "    )\n",
     "print(f\"{n_worker_per_dask_cluster=}\")\n",
     "print(f\"{n_node_per_dask_cluster=}\")\n",
     "\n",
@@ -471,8 +473,12 @@
     "\n",
     "\n",
     "def compute_haversine_distance(df):\n",
-    "    pickup = cuspatial.GeoSeries.from_points_xy(df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
-    "    dropoff = cuspatial.GeoSeries.from_points_xy(df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
+    "    pickup = cuspatial.GeoSeries.from_points_xy(\n",
+    "        df[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
+    "    )\n",
+    "    dropoff = cuspatial.GeoSeries.from_points_xy(\n",
+    "        df[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
+    "    )\n",
     "    df[\"haversine_distance\"] = cuspatial.haversine_distance(pickup, dropoff)\n",
     "    df[\"haversine_distance\"] = df[\"haversine_distance\"].astype(\"float32\")\n",
     "    return df\n",
@@ -529,7 +535,9 @@
     "    taxi_df[\"is_weekend\"] = (taxi_df[\"day_of_week\"] >= 5).astype(\"int32\")\n",
     "\n",
     "    # calculate the time difference between dropoff and pickup.\n",
-    "    taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\"pickup_datetime\"].astype(\"int32\")\n",
+    "    taxi_df[\"diff\"] = taxi_df[\"dropoff_datetime\"].astype(\"int32\") - taxi_df[\n",
+    "        \"pickup_datetime\"\n",
+    "    ].astype(\"int32\")\n",
     "    taxi_df[\"diff\"] = (taxi_df[\"diff\"] / 1000).astype(\"int32\")\n",
     "\n",
     "    taxi_df[\"pickup_latitude_r\"] = taxi_df[\"pickup_latitude\"] // 0.01 * 0.01\n",
@@ -542,7 +550,11 @@
     "\n",
     "    taxi_df = taxi_df.map_partitions(compute_haversine_distance)\n",
     "\n",
-    "    X = taxi_df.drop([\"fare_amount\"], axis=1).astype(\"float32\").to_dask_array(lengths=True)\n",
+    "    X = (\n",
+    "        taxi_df.drop([\"fare_amount\"], axis=1)\n",
+    "        .astype(\"float32\")\n",
+    "        .to_dask_array(lengths=True)\n",
+    "    )\n",
     "    y = taxi_df[\"fare_amount\"].astype(\"float32\").to_dask_array(lengths=True)\n",
     "\n",
     "    X._meta = cp.asarray(X._meta)\n",
@@ -659,7 +671,9 @@
     }
    ],
    "source": [
-    "n_trials = 10  # set to a low number so that the demo finishes quickly. Feel free to adjust\n",
+    "n_trials = (\n",
+    "    10  # set to a low number so that the demo finishes quickly. Feel free to adjust\n",
+    ")\n",
     "study = optuna.create_study(direction=\"minimize\")"
    ]
   },
diff --git a/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb b/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
index c321b250..5726ed4e 100644
--- a/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
+++ b/source/examples/xgboost-randomforest-gpu-hpo-dask/notebook.ipynb
@@ -410,7 +410,9 @@
     "        clf = dcv.GridSearchCV(model, gridsearch_params, cv=N_FOLDS, scoring=scorer)\n",
     "    elif mode == \"gpu-random\":\n",
     "        print(\"gpu-random selected\")\n",
-    "        clf = dcv.RandomizedSearchCV(model, gridsearch_params, cv=N_FOLDS, scoring=scorer, n_iter=n_iter)\n",
+    "        clf = dcv.RandomizedSearchCV(\n",
+    "            model, gridsearch_params, cv=N_FOLDS, scoring=scorer, n_iter=n_iter\n",
+    "        )\n",
     "\n",
     "    else:\n",
     "        print(\"Unknown Option, please choose one of [gpu-grid, gpu-random]\")\n",
@@ -567,7 +569,9 @@
     "mode = \"gpu-grid\"\n",
     "\n",
     "with timed(\"XGB-\" + mode):\n",
-    "    res, results = do_HPO(model_gpu_xgb, params_xgb, cuml_accuracy_scorer, X_train, y_cpu, mode=mode)\n",
+    "    res, results = do_HPO(\n",
+    "        model_gpu_xgb, params_xgb, cuml_accuracy_scorer, X_train, y_cpu, mode=mode\n",
+    "    )\n",
     "num_params = len(results.cv_results_[\"mean_test_score\"])\n",
     "print(f\"Searched over {num_params} parameters\")"
    ]
diff --git a/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py b/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
index 06fbd6e1..37ccf356 100644
--- a/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
+++ b/source/examples/xgboost-rf-gpu-cpu-benchmark/hpo.py
@@ -70,7 +70,9 @@ def train_xgboost(trial, *, target, reseed_rng, threads_per_worker=None):
     params = {
         "max_depth": trial.suggest_int("max_depth", 4, 8),
         "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1, log=True),
-        "min_child_weight": trial.suggest_float("min_child_weight", 0.1, 10.0, log=True),
+        "min_child_weight": trial.suggest_float(
+            "min_child_weight", 0.1, 10.0, log=True
+        ),
         "reg_alpha": trial.suggest_float("reg_alpha", 0.0001, 100, log=True),
         "reg_lambda": trial.suggest_float("reg_lambda", 0.0001, 100, log=True),
         "verbosity": 0,
@@ -133,12 +135,16 @@ def train_randomforest(trial, *, target, reseed_rng, threads_per_worker=None):
 
             params["n_streams"] = 4
             params["n_bins"] = 256
-            params["split_criterion"] = trial.suggest_categorical("split_criterion", ["gini", "entropy"])
+            params["split_criterion"] = trial.suggest_categorical(
+                "split_criterion", ["gini", "entropy"]
+            )
             trained_model = RF_gpu(**params)
             accuracy_score_func = accuracy_score_gpu
         else:
             params["n_jobs"] = threads_per_worker
-            params["criterion"] = trial.suggest_categorical("criterion", ["gini", "entropy"])
+            params["criterion"] = trial.suggest_categorical(
+                "criterion", ["gini", "entropy"]
+            )
             trained_model = RF_cpu(**params)
             accuracy_score_func = accuracy_score_cpu
 
@@ -222,12 +228,16 @@ def main(args):
                 )
                 for _ in range(*iter_range)
             ]
-            print(f"Testing hyperparameter combinations {iter_range[0]}..{iter_range[1]}")
+            print(
+                f"Testing hyperparameter combinations {iter_range[0]}..{iter_range[1]}"
+            )
             _ = wait(futures)
             for fut in futures:
                 _ = fut.result()  # Ensure that the training job was successful
             tnow = time.perf_counter()
-            print(f"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}")
+            print(
+                f"Best cross-validation metric: {study.best_value}, Time elapsed = {tnow - tstart}"
+            )
     tend = time.perf_counter()
     print(f"Time elapsed: {tend - tstart} sec")
     cluster.close()
@@ -235,7 +245,9 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--model-type", type=str, required=True, choices=["XGBoost", "RandomForest"])
+    parser.add_argument(
+        "--model-type", type=str, required=True, choices=["XGBoost", "RandomForest"]
+    )
     parser.add_argument("--target", required=True, choices=["gpu", "cpu"])
     parser.add_argument(
         "--threads_per_worker",
diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md
index f2feff4e..8c922216 100644
--- a/source/guides/azure/infiniband.md
+++ b/source/guides/azure/infiniband.md
@@ -392,3 +392,4 @@ Wall clock                | 8.46 s +/- 1.73 s
 ```{relatedexamples}
 
 ```
+````

From 622f326ae9bd61ba6a827182b6c86b740420105e Mon Sep 17 00:00:00 2001
From: Melody Wang 
Date: Mon, 14 Oct 2024 10:33:05 -0400
Subject: [PATCH 23/27] removed package.json and package-lock.json and added to
 .gitignore

---
 .gitignore        |  3 +++
 package-lock.json | 28 ----------------------------
 package.json      |  5 -----
 3 files changed, 3 insertions(+), 33 deletions(-)
 delete mode 100644 package-lock.json
 delete mode 100644 package.json

diff --git a/.gitignore b/.gitignore
index e6ad9798..53db6445 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,6 @@ jupyter_execute/
 
 # exclusions
 !source/examples/rapids-1brc-single-node/lookup.csv
+
+package.json
+package-lock.json
diff --git a/package-lock.json b/package-lock.json
deleted file mode 100644
index 0d3089e1..00000000
--- a/package-lock.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "name": "deployment",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "devDependencies": {
-        "prettier": "^3.3.3"
-      }
-    },
-    "node_modules/prettier": {
-      "version": "3.3.3",
-      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.3.tgz",
-      "integrity": "sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==",
-      "dev": true,
-      "license": "MIT",
-      "bin": {
-        "prettier": "bin/prettier.cjs"
-      },
-      "engines": {
-        "node": ">=14"
-      },
-      "funding": {
-        "url": "https://github.com/prettier/prettier?sponsor=1"
-      }
-    }
-  }
-}
diff --git a/package.json b/package.json
deleted file mode 100644
index c2436a9f..00000000
--- a/package.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-  "devDependencies": {
-    "prettier": "^3.3.3"
-  }
-}

From 92650208e39b0ce9e982e3361d3b184e300be947 Mon Sep 17 00:00:00 2001
From: Melody Wang <98235366+melodywang060@users.noreply.github.com>
Date: Tue, 15 Oct 2024 06:58:31 -0400
Subject: [PATCH 24/27] Update source/guides/azure/infiniband.md

Co-authored-by: James Lamb 
---
 source/guides/azure/infiniband.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md
index 8c922216..f2feff4e 100644
--- a/source/guides/azure/infiniband.md
+++ b/source/guides/azure/infiniband.md
@@ -392,4 +392,3 @@ Wall clock                | 8.46 s +/- 1.73 s
 ```{relatedexamples}
 
 ```
-````

From 2ffcf9a62b3c66cc1fb8d0ac03adbe7ac6789515 Mon Sep 17 00:00:00 2001
From: Melody Wang <98235366+melodywang060@users.noreply.github.com>
Date: Tue, 15 Oct 2024 06:58:39 -0400
Subject: [PATCH 25/27] Update source/guides/azure/infiniband.md

Co-authored-by: James Lamb 
---
 source/guides/azure/infiniband.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md
index f2feff4e..8dd45ec5 100644
--- a/source/guides/azure/infiniband.md
+++ b/source/guides/azure/infiniband.md
@@ -257,7 +257,7 @@ Then start a new shell.
 
 Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs)
 
-````shell
+```shell
 mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y
 mamba activate ucxpy
 

From 178925f92d9d2f3b356cddf8acc4b14c2688c911 Mon Sep 17 00:00:00 2001
From: Melody Wang 
Date: Wed, 16 Oct 2024 06:47:41 -0400
Subject: [PATCH 26/27] fixed linting issues

---
 source/guides/azure/infiniband.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md
index 8dd45ec5..9ed502de 100644
--- a/source/guides/azure/infiniband.md
+++ b/source/guides/azure/infiniband.md
@@ -257,7 +257,7 @@ Then start a new shell.
 
 Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs)
 
-```shell
+````shell
 mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y
 mamba activate ucxpy
 
@@ -266,7 +266,7 @@ Clone UCX-Py repo locally
 ```shell
 git clone https://github.com/rapidsai/ucx-py.git
 cd ucx-py
-```
+````
 
 ### Run Tests
 

From df3fe1c081b66038bf31be2e17fa7df53db49c6d Mon Sep 17 00:00:00 2001
From: Jacob Tomlinson 
Date: Thu, 31 Oct 2024 09:54:18 +0000
Subject: [PATCH 27/27] Update source/guides/azure/infiniband.md

Co-authored-by: Bradley Dice 
---
 source/guides/azure/infiniband.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md
index 9ed502de..f2feff4e 100644
--- a/source/guides/azure/infiniband.md
+++ b/source/guides/azure/infiniband.md
@@ -266,7 +266,7 @@ Clone UCX-Py repo locally
 ```shell
 git clone https://github.com/rapidsai/ucx-py.git
 cd ucx-py
-````
+```
 
 ### Run Tests