Merge pull request #317 from SurajAralihalli/branch-23.08-main-release

SurajAralihalli · web-flow · commit dba5adfa9bec · 2023-08-28T19:14:08.000-07:00
Branch 23.08 main release
diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
@@ -18,7 +18,7 @@ name: auto-merge HEAD to BASE
 on:
   pull_request_target:
     branches:
-    - branch-23.06
+    - branch-23.08
     types: [closed]
 
 jobs:
@@ -29,14 +29,14 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
-          ref: branch-23.06 # force to fetch from latest upstream instead of PR ref
+          ref: branch-23.08 # force to fetch from latest upstream instead of PR ref
 
       - name: auto-merge job
         uses: ./.github/workflows/auto-merge
         env:
           OWNER: NVIDIA
           REPO_NAME: spark-rapids-examples
-          HEAD: branch-23.06
-          BASE: branch-23.08
+          HEAD: branch-23.08
+          BASE: branch-23.10
           AUTOMERGE_TOKEN: ${{ secrets.AUTOMERGE_TOKEN }} # use to merge PR
 
diff --git a/dockerfile/Dockerfile b/dockerfile/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-FROM nvidia/cuda:11.0-devel-ubuntu18.04
+FROM nvidia/cuda:11.8.0-devel-ubuntu18.04
 ARG spark_uid=185
 
 # Install java dependencies 
diff --git a/docs/get-started/xgboost-examples/csp/databricks/databricks.md b/docs/get-started/xgboost-examples/csp/databricks/databricks.md
@@ -21,7 +21,7 @@ Navigate to your home directory in the UI and select **Create** > **File** from
 create an `init.sh` scripts with contents:   
    ```bash
    #!/bin/bash
-   sudo wget -O /databricks/jars/rapids-4-spark_2.12-23.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar
+   sudo wget -O /databricks/jars/rapids-4-spark_2.12-23.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar
    ```
 1. Select the Databricks Runtime Version from one of the supported runtimes specified in the
    Prerequisites section.
@@ -68,7 +68,7 @@ create an `init.sh` scripts with contents:
     ```bash
     spark.rapids.sql.python.gpu.enabled true
     spark.python.daemon.module rapids.daemon_databricks
-    spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.06.0.jar:/databricks/spark/python
+    spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-23.08.1.jar:/databricks/spark/python
     ```
    Note that since python memory pool require installing the cudf library, so you need to install cudf library in 
    each worker nodes `pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com` or disable python memory pool
diff --git a/docs/get-started/xgboost-examples/csp/databricks/init.sh b/docs/get-started/xgboost-examples/csp/databricks/init.sh
@@ -1,7 +1,7 @@
 sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.5.2.jar
 sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar
 
-sudo wget -O /databricks/jars/rapids-4-spark_2.12-23.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar
+sudo wget -O /databricks/jars/rapids-4-spark_2.12-23.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar
 sudo wget -O /databricks/jars/xgboost4j-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-gpu_2.12/1.7.1/xgboost4j-gpu_2.12-1.7.1.jar
 sudo wget -O /databricks/jars/xgboost4j-spark-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-spark-gpu_2.12/1.7.1/xgboost4j-spark-gpu_2.12-1.7.1.jar
 ls -ltr
diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md
@@ -40,7 +40,7 @@ export SPARK_DOCKER_IMAGE=<gpu spark docker image repo and name>
 export SPARK_DOCKER_TAG=<spark docker image tag>
 
 pushd ${SPARK_HOME}
-wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-23.06/dockerfile/Dockerfile
+wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-23.08/dockerfile/Dockerfile
 
 # Optionally install additional jars into ${SPARK_HOME}/jars/
 
diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md
@@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag
 ### Download the jars
 
 Download the RAPIDS Accelerator for Apache Spark plugin jar
-  * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar)
+  * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar)
 
 ### Build XGBoost Python Examples
 
diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md
@@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag
 ### Download the jars
 
 1. Download the RAPIDS Accelerator for Apache Spark plugin jar
-   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar)
+   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar)
 
 ### Build XGBoost Scala Examples
 
diff --git a/examples/MIG-Support/device-plugins/gpu-mig/pom.xml b/examples/MIG-Support/device-plugins/gpu-mig/pom.xml
@@ -35,7 +35,7 @@
     </licenses>
 
     <properties>
-        <yarn.version>3.3.0</yarn.version>
+        <yarn.version>3.3.6</yarn.version>
         <java.version>1.8</java.version>
         <maven.compiler.version>3.8.1</maven.compiler.version>
         <maven.jar.plugin.version>3.2.0</maven.jar.plugin.version>
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile b/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,9 +15,9 @@
 # limitations under the License.
 #
 
-ARG CUDA_VER=11.5.1
+ARG CUDA_VER=11.8.0
 FROM nvidia/cuda:${CUDA_VER}-devel-ubuntu20.04
-ARG BRANCH_VER=23.06
+ARG BRANCH_VER=23.08
 
 RUN apt-get update
 RUN apt-get install -y wget ninja-build git
@@ -42,7 +42,7 @@ RUN conda install -c conda-forge openjdk=8 maven=3.8.1 -y
 RUN conda install -c rapidsai-nightly -c nvidia -c conda-forge cudf=${BRANCH_VER} python=3.8 -y
 
 RUN wget --quiet \
-    https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.tar.gz \
+    https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz \
     && tar -xzf cmake-3.21.3-linux-x86_64.tar.gz \
     && rm -rf cmake-3.21.3-linux-x86_64.tar.gz
 
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/README.md b/examples/ML+DL-Examples/Spark-cuML/pca/README.md
@@ -12,14 +12,15 @@ User can also download the release jar from Maven central:
 
 [rapids-4-spark-ml_2.12-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark-ml_2.12/22.02.0/rapids-4-spark-ml_2.12-22.02.0-cuda11.jar)
 
-[rapids-4-spark_2.12-23.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar)
+[rapids-4-spark_2.12-23.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar)
 
+Note: This demo could only work with v22.02.0 version.
 
 ## Sample code
 
 User can find sample scala code in [`main.scala`](main.scala). In the sample code, we will generate random data with 2048 feature dimensions. Then we use PCA to reduce number of features to 3.
 
-Just copy the sample code into the spark-shell laucnhed according to [this section](https://github.com/NVIDIA/spark-rapids-ml#how-to-use) and REPL will give out the algorithm results.
+Just copy the sample code into the spark-shell launched according to [this section](https://github.com/NVIDIA/spark-rapids-ml#how-to-use) and REPL will give out the algorithm results.
 
 ## Notebook
 
@@ -48,7 +49,7 @@ It is assumed that a Standalone Spark cluster has been set up, the `SPARK_MASTER
 
     ``` bash
     RAPIDS_ML_JAR=PATH_TO_rapids-4-spark-ml_2.12-22.02.0-cuda11.jar
-    PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-23.06.0.jar
+    PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-23.08.1.jar
 
     jupyter toree install                                \
     --spark_home=${SPARK_HOME}                             \
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml b/examples/ML+DL-Examples/Spark-cuML/pca/pom.xml
@@ -21,7 +21,7 @@
     <groupId>com.nvidia</groupId>
     <artifactId>PCAExample</artifactId>
     <packaging>jar</packaging>
-    <version>23.06.0</version>
+    <version>23.08.0</version>
 
     <properties>
         <maven.compiler.source>8</maven.compiler.source>
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>com.nvidia</groupId>
             <artifactId>rapids-4-spark-ml_2.12</artifactId>
-            <version>23.06.0</version>
+            <version>23.02.0</version>
         </dependency>
     </dependencies>
 
diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh b/examples/ML+DL-Examples/Spark-cuML/pca/spark-submit.sh
@@ -15,8 +15,9 @@
 # limitations under the License.
 #
 
-ML_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark-ml_2.12/23.06.0-SNAPSHOT/rapids-4-spark-ml_2.12-23.06.0-SNAPSHOT.jar
-PLUGIN_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.06.0-SNAPSHOT/rapids-4-spark_2.12-23.06.0-SNAPSHOT.jar
+ML_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark-ml_2.12/23.04.0-SNAPSHOT/rapids-4-spark-ml_2.12-23.04.0-SNAPSHOT.jar
+PLUGIN_JAR=/root/.m2/repository/com/nvidia/rapids-4-spark_2.12/23.08.0-SNAPSHOT/rapids-4-spark_2.12-23.08.0-SNAPSHOT.jar
+Note: The last rapids-4-spark-ml release version is 22.02.0, snapshot version is 23.04.0-SNPASHOT.
 
 $SPARK_HOME/bin/spark-submit \
 --master spark://127.0.0.1:7077  \
@@ -38,4 +39,4 @@ $SPARK_HOME/bin/spark-submit \
 --conf spark.network.timeout=1000s \
 --jars $ML_JAR,$PLUGIN_JAR \
 --class com.nvidia.spark.examples.pca.Main \
-/workspace/target/PCAExample-23.06.0-SNAPSHOT.jar
+/workspace/target/PCAExample-23.08.0-SNAPSHOT.jar
diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb
@@ -22,7 +22,7 @@
     "import os\n",
     "# Change to your cluster ip:port and directories\n",
     "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n",
-    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-23.06.0.jar\")\n"
+    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-23.08.1.jar\")\n"
    ]
   },
   {
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/Dockerfile b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/Dockerfile
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 #
 
 # A container that can be used to build UDF native code against libcudf
-ARG CUDA_VERSION=11.5.0
+ARG CUDA_VERSION=11.8.0
 ARG LINUX_VERSION=ubuntu18.04
 
 FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION}
@@ -58,7 +58,7 @@ CUDA_VERSION_MINOR=$(echo $CUDA_VERSION | tr -d '.' | cut -c 3); \
 # Set JDK8 as the default Java
 && update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
 
-ARG CMAKE_VERSION=3.23.3
+ARG CMAKE_VERSION=3.26.4
 
 # Install CMake
 RUN cd /tmp \
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
@@ -108,7 +108,7 @@ See above Prerequisites section
 First finish the steps in "Building with Native Code Examples and run test cases" section, then do the following in the docker.
 
 ### Get jars from Maven Central
-[rapids-4-spark_2.12-23.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar)
+[rapids-4-spark_2.12-23.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar)
 
 ### Launch a local mode Spark
 
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml
@@ -25,7 +25,7 @@
         user defined functions for use with the RAPIDS Accelerator
         for Apache Spark
     </description>
-    <version>23.06.0</version>
+    <version>23.08.0</version>
 
     <properties>
         <maven.compiler.source>1.8</maven.compiler.source>
@@ -37,7 +37,7 @@
         <cuda.version>cuda11</cuda.version>
         <scala.binary.version>2.12</scala.binary.version>
         <!-- Depends on release version, Snapshot version is not published to the Maven Central -->
-        <rapids4spark.version>23.06.0</rapids4spark.version>
+        <rapids4spark.version>23.08.1</rapids4spark.version>
         <spark.version>3.1.1</spark.version>
         <scala.version>2.12.15</scala.version>
         <udf.native.build.path>${project.build.directory}/cpp-build</udf.native.build.path>
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt
@@ -16,7 +16,7 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
-file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.06/RAPIDS.cmake
+file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake
      ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 
@@ -32,7 +32,7 @@ if(DEFINED GPU_ARCHS)
 endif()
 rapids_cuda_init_architectures(UDFEXAMPLESJNI)
 
-project(UDFEXAMPLESJNI VERSION 23.06.0 LANGUAGES C CXX CUDA)
+project(UDFEXAMPLESJNI VERSION 23.08.0 LANGUAGES C CXX CUDA)
 
 option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
 option(BUILD_UDF_BENCHMARKS "Build the benchmarks" OFF)
@@ -84,10 +84,10 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w --expt-extended-lambda --expt-relax
 set(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
 
 rapids_cpm_init()
-rapids_cpm_find(cudf 23.06.00
+rapids_cpm_find(cudf 23.08.00
         CPM_ARGS
         GIT_REPOSITORY  https://github.com/rapidsai/cudf.git
-        GIT_TAG         branch-23.06
+        GIT_TAG         branch-23.08
         GIT_SHALLOW     TRUE
         SOURCE_SUBDIR   cpp
         OPTIONS         "BUILD_TESTS OFF"
diff --git a/examples/UDF-Examples/Spark-cuSpatial/Dockerfile b/examples/UDF-Examples/Spark-cuSpatial/Dockerfile
@@ -39,7 +39,7 @@ RUN conda --version
 RUN conda install -c conda-forge openjdk=8 maven=3.8.1 -y
 
 RUN wget --quiet \
-    https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.tar.gz \
+    https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz \
     && tar -xzf cmake-3.21.3-linux-x86_64.tar.gz \
     && rm -rf cmake-3.21.3-linux-x86_64.tar.gz
 
diff --git a/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh b/examples/UDF-Examples/Spark-cuSpatial/gpu-run.sh
@@ -32,7 +32,6 @@ rm -rf $DATA_OUT_PATH
 JARS=$ROOT_PATH/jars
 
 JARS_PATH=${JARS_PATH:-$JARS/rapids-4-spark_2.12-23.02.0.jar,$JARS/spark-cuspatial-23.02.0.jar}
-
 $SPARK_HOME/bin/spark-submit --master spark://$HOSTNAME:7077 \
 --name "Gpu Spatial Join UDF" \
 --executor-memory 20G \
diff --git a/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb b/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb
@@ -73,7 +73,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-30 06:57:40,550 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 23.06.0 using cudf 23.06.0.\n",
+      "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 23.08.1 using cudf 23.08.1.\n",
       "2022-11-30 06:57:54,210 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n",
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# Dataset\n",
     "\n",
-    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.06/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
+    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "# ETL + XGBoost train & transform\n",
     "\n",
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb
@@ -6,18 +6,18 @@
    "source": [
     "## Prerequirement\n",
     "### 1. Download data\n",
-    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.06/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
+    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-23.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.06.0/rapids-4-spark_2.12-23.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-23.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/23.08.1/rapids-4-spark_2.12-23.08.1.jar)\n",
     "\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-23.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-23.08.1.jar\n",
     "$ export PYSPARK_DRIVER_PYTHON=jupyter \n",
     "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n",
     "```\n",
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb
@@ -63,7 +63,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-25 09:34:43,952 WARN resource.ResourceUtils: The configuration of cores (exec = 4 task = 1, runnable tasks = 4) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 23.06.0 using cudf 23.06.0.\n",
+      "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 23.08.1 using cudf 23.08.1.\n",
       "2022-11-25 09:34:58,171 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n"
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb
@@ -84,7 +84,7 @@
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMaster\n",
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMasterHeartbeat\n",
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering OutputCommitCoordinator\n",
-      "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 23.06.0 using cudf 23.06.0.\n",
+      "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 23.08.1 using cudf 23.08.1.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n"
diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb
diff --git a/examples/XGBoost-Examples/pom.xml b/examples/XGBoost-Examples/pom.xml
diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb
diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb
diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb
diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb
diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb
diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`"import os\n",`
`23`	`23`	`"# Change to your cluster ip:port and directories\n",`
`24`	`24`	`"SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n",`
`25`		`- "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-23.06.0.jar\")\n"`
	`25`	`+ "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-23.08.1.jar\")\n"`
`26`	`26`	`]`
`27`	`27`	`},`
`28`	`28`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`#`
`2`		`-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.`
	`2`	`+# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.`
`3`	`3`	`#`
`4`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`# you may not use this file except in compliance with the License.`
`@@ -15,7 +15,7 @@`
`15`	`15`	`#`
`16`	`16`
`17`	`17`	`# A container that can be used to build UDF native code against libcudf`
`18`		`-ARG CUDA_VERSION=11.5.0`
	`18`	`+ARG CUDA_VERSION=11.8.0`
`19`	`19`	`ARG LINUX_VERSION=ubuntu18.04`
`20`	`20`
`21`	`21`	`FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION}`
`@@ -58,7 +58,7 @@ CUDA_VERSION_MINOR=$(echo $CUDA_VERSION \| tr -d '.' \| cut -c 3); \`
`58`	`58`	`# Set JDK8 as the default Java`
`59`	`59`	`&& update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java`
`60`	`60`
`61`		`-ARG CMAKE_VERSION=3.23.3`
	`61`	`+ARG CMAKE_VERSION=3.26.4`
`62`	`62`
`63`	`63`	`# Install CMake`
`64`	`64`	`RUN cd /tmp \`