From 0870051b6fbe8ad5a5cec93035d1784e9b18cbd8 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 23 Sep 2024 11:41:42 -0500
Subject: [PATCH 1/7] Improve Polars docs (#16820)

This PR improves the docs by reducing the size of the Polars heading
(too many words) and tightening up the writing of the docs page.

---------

Co-authored-by: Ray Douglass <ray@raydouglass.com>
---
 .github/workflows/build.yaml           |  2 +-
 .github/workflows/pr.yaml              |  6 +++---
 .github/workflows/test.yaml            |  6 +++---
 docs/cudf/source/cudf_polars/index.rst | 12 ++++++------
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 2e5959338b0..379f39ac965 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -62,7 +62,7 @@ jobs:
       arch: "amd64"
       branch: ${{ inputs.branch }}
       build_type: ${{ inputs.build_type || 'branch' }}
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       date: ${{ inputs.date }}
       node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 25f11863b0d..0fe4533f68e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -89,7 +89,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/test_java.sh"
   static-configure:
     needs: checks
@@ -109,7 +109,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/test_notebooks.sh"
   docs-build:
     needs: conda-python-build
@@ -119,7 +119,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/build_docs.sh"
   wheel-build-cudf:
     needs: checks
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 36c9088d93c..a10117a45e6 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -41,7 +41,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/test_cpp_memcheck.sh"
   static-configure:
     secrets: inherit
@@ -81,7 +81,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
@@ -93,7 +93,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
+      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
diff --git a/docs/cudf/source/cudf_polars/index.rst b/docs/cudf/source/cudf_polars/index.rst
index cc7aabd124f..0a3a0d86b2c 100644
--- a/docs/cudf/source/cudf_polars/index.rst
+++ b/docs/cudf/source/cudf_polars/index.rst
@@ -1,7 +1,7 @@
-cuDF-based GPU backend for Polars [Open Beta]
-=============================================
+Polars GPU engine
+=================
 
-cuDF supports an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API.
+cuDF provides an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API.
 The engine supports most of the core expressions and data types as well as a growing set of more advanced dataframe manipulations
 and data file formats. When using the GPU engine, Polars will convert expressions into an optimized query plan and determine
 whether the plan is supported on the GPU. If it is not, the execution will transparently fall back to the standard Polars engine
@@ -16,7 +16,7 @@ We reproduced the `Polars Decision Support (PDS) <https://github.com/pola-rs/pol
 
 
 
-You can see up to 13x speedup using the GPU backend on the compute-heavy PDS queries involving complex aggregation and join operations. Below are the speedups for the top performing queries:
+You can see up to 13x speedup using the GPU engine on the compute-heavy PDS queries involving complex aggregation and join operations. Below are the speedups for the top performing queries:
 
 
 .. figure:: ../_static/compute_heavy_queries_polars.png
@@ -29,7 +29,7 @@ You can reproduce the results by visiting the `Polars Decision Support (PDS) Git
 Learn More
 ----------
 
-The GPU backend for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page <https://docs.pola.rs/user-guide/gpu-support/>`__ on the Polars website.
+The GPU engine for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page <https://docs.pola.rs/user-guide/gpu-support/>`__ on the Polars website.
 
 Launch on Google Colab
 ----------------------
@@ -38,4 +38,4 @@ Launch on Google Colab
    :width: 200px
    :target: https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb
 
-   Take the cuDF backend for Polars for a test-drive in a free GPU-enabled notebook environment using your Google account by `launching on Colab <https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb>`__.
+   Try out the GPU engine for Polars in a free GPU notebook environment. Sign in with your Google account and `launch the demo on Colab <https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb>`__.

From 389208c9a46fd6583efacfe9c1875c862e8d0c90 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 23 Sep 2024 14:03:57 -0500
Subject: [PATCH 2/7] Ignore numba warning specific to ARM runners (#16872)

This PR ignores numba warnings that are showing up in arm runners: https://github.com/numba/numba/issues/6589#issuecomment-748595076

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16872
---
 python/cudf/cudf/tests/pytest.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini
index 8a594794fac..d05ba9aaacc 100644
--- a/python/cudf/cudf/tests/pytest.ini
+++ b/python/cudf/cudf/tests/pytest.ini
@@ -14,4 +14,6 @@ filterwarnings =
     ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning
     # PerformanceWarning from cupy warming up the JIT cache
     ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning
+    # Ignore numba PEP 456 warning specific to arm machines
+    ignore:FNV hashing is not implemented in Numba.*:UserWarning
 addopts = --tb=native

From 8b12cf4e66b4b1f8ec248493c27deb65ee625bbf Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 23 Sep 2024 15:35:32 -0500
Subject: [PATCH 3/7] Update fmt (to 11.0.2) and spdlog (to 1.14.1). (#16806)

## Description

Replaces #15603

Contributes to:

* https://github.com/rapidsai/build-planning/issues/54
* https://github.com/rapidsai/build-planning/issues/56
* https://github.com/rapidsai/rapids-cmake/issues/387

Now that most of `conda-forge` has been updated to `fmt >=11.0.1,<12`
and `spdlog>=1.14.1,<1.15`
(https://github.com/rapidsai/build-planning/issues/56#issuecomment-2334281452),
we're attempting to upgrade RAPIDS to similar versions of those
libraries.

This improves the likelihood that RAPIDS will be installable alongside
newer versions of its
dependencies and complementary packages on conda-forge.

## Notes for Reviewers

This PR is testing changes made in
https://github.com/rapidsai/rapids-cmake/pull/689.
It shouldn't be merged until those `rapids-cmake` changes are merged and
any testing-specific details have been removed.
---
 .../all_cuda-118_arch-x86_64.yaml             |  4 ++--
 .../all_cuda-125_arch-x86_64.yaml             |  4 ++--
 conda/recipes/libcudf/conda_build_config.yaml |  4 ++--
 cpp/CMakeLists.txt                            |  2 +-
 cpp/cmake/thirdparty/get_spdlog.cmake         | 21 ++++++-------------
 dependencies.yaml                             |  4 ++--
 6 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index c96e8706d27..16b3d112992 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -31,7 +31,7 @@ dependencies:
 - doxygen=1.9.1
 - fastavro>=0.22.9
 - flatbuffers==24.3.25
-- fmt>=10.1.1,<11
+- fmt>=11.0.2,<12
 - fsspec>=0.6.0
 - gcc_linux-64=11.*
 - hypothesis
@@ -84,7 +84,7 @@ dependencies:
 - s3fs>=2022.3.0
 - scikit-build-core>=0.10.0
 - scipy
-- spdlog>=1.12.0,<1.13
+- spdlog>=1.14.1,<1.15
 - sphinx
 - sphinx-autobuild
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index e54a44d9f6e..cce2e0eea84 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -32,7 +32,7 @@ dependencies:
 - doxygen=1.9.1
 - fastavro>=0.22.9
 - flatbuffers==24.3.25
-- fmt>=10.1.1,<11
+- fmt>=11.0.2,<12
 - fsspec>=0.6.0
 - gcc_linux-64=11.*
 - hypothesis
@@ -82,7 +82,7 @@ dependencies:
 - s3fs>=2022.3.0
 - scikit-build-core>=0.10.0
 - scipy
-- spdlog>=1.12.0,<1.13
+- spdlog>=1.14.1,<1.15
 - sphinx
 - sphinx-autobuild
 - sphinx-copybutton
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 33fa4b4eccf..dc75eb4b252 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -26,13 +26,13 @@ librdkafka_version:
   - ">=2.5.0,<2.6.0a0"
 
 fmt_version:
-  - ">=10.1.1,<11"
+  - ">=11.0.2,<12"
 
 flatbuffers_version:
   - "=24.3.25"
 
 spdlog_version:
-  - ">=1.12.0,<1.13"
+  - ">=1.14.1,<1.15"
 
 nvcomp_version:
   - "=4.0.1"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 26c086046a8..84b462bb884 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -798,7 +798,7 @@ add_dependencies(cudf jitify_preprocess_run)
 # Specify the target module library dependencies
 target_link_libraries(
   cudf
-  PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
+  PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool> spdlog::spdlog_header_only
   PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
           kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
 )
diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake
index c0e07d02d94..90b0f4d8a8e 100644
--- a/cpp/cmake/thirdparty/get_spdlog.cmake
+++ b/cpp/cmake/thirdparty/get_spdlog.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -16,21 +16,12 @@
 function(find_and_configure_spdlog)
 
   include(${rapids-cmake-dir}/cpm/spdlog.cmake)
-  rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET cudf-exports)
-  rapids_export_package(BUILD spdlog cudf-exports)
+  rapids_cpm_spdlog(
+    FMT_OPTION "EXTERNAL_FMT_HO"
+    INSTALL_EXPORT_SET cudf-exports
+    BUILD_EXPORT_SET cudf-exports
+  )
 
-  if(spdlog_ADDED)
-    rapids_export(
-      BUILD spdlog
-      EXPORT_SET spdlog
-      GLOBAL_TARGETS spdlog spdlog_header_only
-      NAMESPACE spdlog::
-    )
-    include("${rapids-cmake-dir}/export/find_package_root.cmake")
-    rapids_export_find_package_root(
-      BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports
-    )
-  endif()
 endfunction()
 
 find_and_configure_spdlog()
diff --git a/dependencies.yaml b/dependencies.yaml
index 2f2d7ba679e..01edcb3889a 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -350,12 +350,12 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - fmt>=10.1.1,<11
+          - fmt>=11.0.2,<12
           - flatbuffers==24.3.25
           - librdkafka>=2.5.0,<2.6.0a0
           # Align nvcomp version with rapids-cmake
           - nvcomp==4.0.1
-          - spdlog>=1.12.0,<1.13
+          - spdlog>=1.14.1,<1.15
   rapids_build_skbuild:
     common:
       - output_types: [conda, requirements, pyproject]

From 6badd6b183e966f7f882708a0f4b2c4d0f2b5368 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Tue, 24 Sep 2024 08:17:53 -0500
Subject: [PATCH 4/7] Add in support for setting delim when parsing JSON
 through java (#16867) (#16880)

This is a back-port of #16867 to 24.10.

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)

URL: https://github.com/rapidsai/cudf/pull/16880
---
 .../main/java/ai/rapids/cudf/JSONOptions.java | 16 ++++++++++++++++
 java/src/main/java/ai/rapids/cudf/Table.java  | 19 ++++++++++++++-----
 java/src/main/native/src/TableJni.cpp         | 12 ++++++++++--
 .../test/java/ai/rapids/cudf/TableTest.java   | 19 ++++++++++++++++++-
 4 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
index c8308ca17ec..17b497be5ee 100644
--- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
@@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions {
   private final boolean allowLeadingZeros;
   private final boolean allowNonNumericNumbers;
   private final boolean allowUnquotedControlChars;
+  private final byte lineDelimiter;
 
   private JSONOptions(Builder builder) {
     super(builder);
@@ -52,6 +53,11 @@ private JSONOptions(Builder builder) {
     allowLeadingZeros = builder.allowLeadingZeros;
     allowNonNumericNumbers = builder.allowNonNumericNumbers;
     allowUnquotedControlChars = builder.allowUnquotedControlChars;
+    lineDelimiter = builder.lineDelimiter;
+  }
+
+  public byte getLineDelimiter() {
+    return lineDelimiter;
   }
 
   public boolean isDayFirst() {
@@ -123,6 +129,16 @@ public static final class Builder  extends ColumnFilterOptions.Builder<JSONOptio
     private boolean mixedTypesAsStrings = false;
     private boolean keepQuotes = false;
 
+    private byte lineDelimiter = '\n';
+
+    public Builder withLineDelimiter(char delimiter) {
+      if (delimiter > Byte.MAX_VALUE) {
+        throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter);
+      }
+      lineDelimiter = (byte)delimiter;
+      return this;
+    }
+
     /**
      * Should json validation be strict or not
      */
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 09da43374ae..19c72809cea 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -258,7 +258,8 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
                                         boolean strictValidation,
                                         boolean allowLeadingZeros,
                                         boolean allowNonNumericNumbers,
-                                        boolean allowUnquotedControl) throws CudfException;
+                                        boolean allowUnquotedControl,
+                                        byte lineDelimiter) throws CudfException;
 
   private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames,
                                       int[] dTypeIds, int[] dTypeScales,
@@ -272,6 +273,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
                                       boolean allowLeadingZeros,
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
+                                      byte lineDelimiter,
                                       long dsHandle) throws CudfException;
 
   private static native long readAndInferJSONFromDataSource(boolean dayFirst, boolean lines,
@@ -284,6 +286,7 @@ private static native long readAndInferJSONFromDataSource(boolean dayFirst, bool
                                       boolean allowLeadingZeros,
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
+                                      byte lineDelimiter,
                                       long dsHandle) throws CudfException;
 
   private static native long readAndInferJSON(long address, long length,
@@ -297,7 +300,8 @@ private static native long readAndInferJSON(long address, long length,
                                               boolean strictValidation,
                                               boolean allowLeadingZeros,
                                               boolean allowNonNumericNumbers,
-                                              boolean allowUnquotedControl) throws CudfException;
+                                              boolean allowUnquotedControl,
+                                              byte lineDelimiter) throws CudfException;
 
   /**
    * Read in Parquet formatted data.
@@ -1321,7 +1325,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     opts.strictValidation(),
                     opts.leadingZerosAllowed(),
                     opts.nonNumericNumbersAllowed(),
-                    opts.unquotedControlChars()))) {
+                    opts.unquotedControlChars(),
+                    opts.getLineDelimiter()))) {
 
       return gatherJSONColumns(schema, twm, -1);
     }
@@ -1404,7 +1409,8 @@ public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer,
         opts.strictValidation(),
         opts.leadingZerosAllowed(),
         opts.nonNumericNumbersAllowed(),
-        opts.unquotedControlChars()));
+        opts.unquotedControlChars(),
+        opts.getLineDelimiter()));
   }
 
   /**
@@ -1426,6 +1432,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) {
           opts.leadingZerosAllowed(),
           opts.nonNumericNumbersAllowed(),
           opts.unquotedControlChars(),
+          opts.getLineDelimiter(),
           dsHandle));
         return twm;
       } finally {
@@ -1479,7 +1486,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             opts.strictValidation(),
             opts.leadingZerosAllowed(),
             opts.nonNumericNumbersAllowed(),
-            opts.unquotedControlChars()))) {
+            opts.unquotedControlChars(),
+            opts.getLineDelimiter()))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
     }
   }
@@ -1518,6 +1526,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.leadingZerosAllowed(),
         opts.nonNumericNumbersAllowed(),
         opts.unquotedControlChars(),
+        opts.getLineDelimiter(),
         dsHandle))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
     } finally {
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 92e213bcb60..96d4c2c4eeb 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1627,6 +1627,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
                                                          jboolean allow_leading_zeros,
                                                          jboolean allow_nonnumeric_numbers,
                                                          jboolean allow_unquoted_control,
+                                                         jbyte line_delimiter,
                                                          jlong ds_handle)
 {
   JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0);
@@ -1646,6 +1647,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
         .normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
         .normalize_whitespace(static_cast<bool>(normalize_whitespace))
         .mixed_types_as_string(mixed_types_as_string)
+        .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes);
     if (strict_validation) {
@@ -1676,7 +1678,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
                                            jboolean strict_validation,
                                            jboolean allow_leading_zeros,
                                            jboolean allow_nonnumeric_numbers,
-                                           jboolean allow_unquoted_control)
+                                           jboolean allow_unquoted_control,
+                                           jbyte line_delimiter)
 {
   JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0);
   if (buffer_length <= 0) {
@@ -1700,6 +1703,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
         .normalize_whitespace(static_cast<bool>(normalize_whitespace))
         .strict_validation(strict_validation)
         .mixed_types_as_string(mixed_types_as_string)
+        .delimiter(static_cast<char>(line_delimiter))
         .keep_quotes(keep_quotes);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
@@ -1814,6 +1818,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
                                                  jboolean allow_leading_zeros,
                                                  jboolean allow_nonnumeric_numbers,
                                                  jboolean allow_unquoted_control,
+                                                 jbyte line_delimiter,
                                                  jlong ds_handle)
 {
   JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0);
@@ -1848,6 +1853,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
         .normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
         .normalize_whitespace(static_cast<bool>(normalize_whitespace))
         .mixed_types_as_string(mixed_types_as_string)
+        .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes);
     if (strict_validation) {
@@ -1908,7 +1914,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
                                                            jboolean strict_validation,
                                                            jboolean allow_leading_zeros,
                                                            jboolean allow_nonnumeric_numbers,
-                                                           jboolean allow_unquoted_control)
+                                                           jboolean allow_unquoted_control,
+                                                           jbyte line_delimiter)
 {
   bool read_buffer = true;
   if (buffer == 0) {
@@ -1957,6 +1964,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
         .normalize_single_quotes(static_cast<bool>(normalize_single_quotes))
         .normalize_whitespace(static_cast<bool>(normalize_whitespace))
         .mixed_types_as_string(mixed_types_as_string)
+        .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes);
     if (strict_validation) {
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 830f2b33b32..c7fcb1756b6 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -40,7 +40,6 @@
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
-import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
 
 import java.io.*;
@@ -656,6 +655,24 @@ void testJSONValidationUnquotedControl() {
     }
   }
 
+  private static final byte[] CR_JSON_TEST_BUFFER = ("{\"a\":\"12\n3\"}\0" +
+      "{\"a\":\"AB\nC\"}\0").getBytes(StandardCharsets.UTF_8);
+
+  @Test
+  void testReadJSONDelim() {
+    Schema schema = Schema.builder().addColumn(DType.STRING, "a").build();
+    JSONOptions opts = JSONOptions.builder()
+        .withLines(true)
+        .withLineDelimiter('\0')
+        .build();
+    try (Table expected = new Table.TestBuilder()
+        .column("12\n3", "AB\nC")
+        .build();
+        Table found = Table.readJSON(schema, opts, CR_JSON_TEST_BUFFER)) {
+      assertTablesAreEqual(expected, found);
+    }
+  }
+
   private static final byte[] NESTED_JSON_DATA_BUFFER = ("{\"a\":{\"c\":\"C1\"}}\n" +
       "{\"a\":{\"c\":\"C2\", \"b\":\"B2\"}}\n" +
       "{\"d\":[1,2,3]}\n" +

From b3518ab7e10f5eabf5ef06a495cc659079e0447c Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Tue, 24 Sep 2024 10:15:38 -0500
Subject: [PATCH 5/7] Add in option for Java JSON APIs to do column pruning in
 CUDF (#16796)

This adds in the options to enable column_pruning when reading JSON using the java APIs.

This is still in draft because there are test failures if this is turned on for those tests.

https://github.com/rapidsai/cudf/issues/16797

That said the performance impact from enabling column pruning on some queries is huge. For one query in particular the current code takes 161.5 seconds and with CUDF column pruning it is just 16.5 seconds. That is a 10x speedup for something that is fairly real world.

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16796
---
 .../main/java/ai/rapids/cudf/JSONOptions.java   | 12 ++++++++++++
 java/src/main/java/ai/rapids/cudf/Table.java    | 17 +++++++++++++++++
 java/src/main/native/src/TableJni.cpp           | 12 +++++++++---
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
index 17b497be5ee..2bb74c3e3b1 100644
--- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
@@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions {
   private final boolean allowLeadingZeros;
   private final boolean allowNonNumericNumbers;
   private final boolean allowUnquotedControlChars;
+  private final boolean cudfPruneSchema;
   private final byte lineDelimiter;
 
   private JSONOptions(Builder builder) {
@@ -53,9 +54,14 @@ private JSONOptions(Builder builder) {
     allowLeadingZeros = builder.allowLeadingZeros;
     allowNonNumericNumbers = builder.allowNonNumericNumbers;
     allowUnquotedControlChars = builder.allowUnquotedControlChars;
+    cudfPruneSchema = builder.cudfPruneSchema;
     lineDelimiter = builder.lineDelimiter;
   }
 
+  public boolean shouldCudfPruneSchema() {
+    return cudfPruneSchema;
+  }
+
   public byte getLineDelimiter() {
     return lineDelimiter;
   }
@@ -129,8 +135,14 @@ public static final class Builder  extends ColumnFilterOptions.Builder<JSONOptio
     private boolean mixedTypesAsStrings = false;
     private boolean keepQuotes = false;
 
+    private boolean cudfPruneSchema = false;
     private byte lineDelimiter = '\n';
 
+    public Builder withCudfPruneSchema(boolean prune) {
+      cudfPruneSchema = prune;
+      return this;
+    }
+
     public Builder withLineDelimiter(char delimiter) {
       if (delimiter > Byte.MAX_VALUE) {
         throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter);
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 19c72809cea..6d370ca27b2 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -259,6 +259,7 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
                                         boolean allowLeadingZeros,
                                         boolean allowNonNumericNumbers,
                                         boolean allowUnquotedControl,
+                                        boolean pruneColumns,
                                         byte lineDelimiter) throws CudfException;
 
   private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames,
@@ -273,6 +274,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
                                       boolean allowLeadingZeros,
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
+                                      boolean pruneColumns,
                                       byte lineDelimiter,
                                       long dsHandle) throws CudfException;
 
@@ -1312,6 +1314,10 @@ private static Table gatherJSONColumns(Schema schema, TableWithMeta twm, int emp
    * @return the file parsed as a table on the GPU.
    */
   public static Table readJSON(Schema schema, JSONOptions opts, File path) {
+    // only prune the schema if one is provided
+    boolean cudfPruneSchema = schema.getColumnNames() != null &&
+        schema.getColumnNames().length != 0 &&
+        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(
             readJSON(schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
                     schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
@@ -1326,6 +1332,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     opts.leadingZerosAllowed(),
                     opts.nonNumericNumbersAllowed(),
                     opts.unquotedControlChars(),
+                    cudfPruneSchema,
                     opts.getLineDelimiter()))) {
 
       return gatherJSONColumns(schema, twm, -1);
@@ -1472,6 +1479,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
     assert len > 0;
     assert len <= buffer.length - offset;
     assert offset >= 0 && offset < buffer.length;
+    // only prune the schema if one is provided
+    boolean cudfPruneSchema = schema.getColumnNames() != null &&
+        schema.getColumnNames().length != 0 &&
+        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(readJSON(
             schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(),
             schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), null,
@@ -1487,6 +1498,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             opts.leadingZerosAllowed(),
             opts.nonNumericNumbersAllowed(),
             opts.unquotedControlChars(),
+            cudfPruneSchema,
             opts.getLineDelimiter()))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
     }
@@ -1513,6 +1525,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) {
    */
   public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) {
     long dsHandle = DataSourceHelper.createWrapperDataSource(ds);
+    // only prune the schema if one is provided
+    boolean cudfPruneSchema = schema.getColumnNames() != null &&
+        schema.getColumnNames().length != 0 &&
+        opts.shouldCudfPruneSchema();
     try (TableWithMeta twm = new TableWithMeta(readJSONFromDataSource(schema.getFlattenedNumChildren(),
         schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(),
         opts.isDayFirst(),
@@ -1526,6 +1542,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.leadingZerosAllowed(),
         opts.nonNumericNumbersAllowed(),
         opts.unquotedControlChars(),
+        cudfPruneSchema,
         opts.getLineDelimiter(),
         dsHandle))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 96d4c2c4eeb..0f77da54152 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1649,7 +1649,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
         .mixed_types_as_string(mixed_types_as_string)
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
-        .keep_quotes(keep_quotes);
+        .keep_quotes(keep_quotes)
+        .prune_columns(false);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
         .nonnumeric_numbers(allow_nonnumeric_numbers)
@@ -1703,6 +1704,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
         .normalize_whitespace(static_cast<bool>(normalize_whitespace))
         .strict_validation(strict_validation)
         .mixed_types_as_string(mixed_types_as_string)
+        .prune_columns(false)
         .delimiter(static_cast<char>(line_delimiter))
         .keep_quotes(keep_quotes);
     if (strict_validation) {
@@ -1818,6 +1820,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
                                                  jboolean allow_leading_zeros,
                                                  jboolean allow_nonnumeric_numbers,
                                                  jboolean allow_unquoted_control,
+                                                 jboolean prune_columns,
                                                  jbyte line_delimiter,
                                                  jlong ds_handle)
 {
@@ -1855,7 +1858,8 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
         .mixed_types_as_string(mixed_types_as_string)
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
-        .keep_quotes(keep_quotes);
+        .keep_quotes(keep_quotes)
+        .prune_columns(prune_columns);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
         .nonnumeric_numbers(allow_nonnumeric_numbers)
@@ -1915,6 +1919,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
                                                            jboolean allow_leading_zeros,
                                                            jboolean allow_nonnumeric_numbers,
                                                            jboolean allow_unquoted_control,
+                                                           jboolean prune_columns,
                                                            jbyte line_delimiter)
 {
   bool read_buffer = true;
@@ -1966,7 +1971,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
         .mixed_types_as_string(mixed_types_as_string)
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
-        .keep_quotes(keep_quotes);
+        .keep_quotes(keep_quotes)
+        .prune_columns(prune_columns);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
         .nonnumeric_numbers(allow_nonnumeric_numbers)

From f8db575330dddf5f32df049ec9928018697fdef3 Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Tue, 24 Sep 2024 14:11:02 -0500
Subject: [PATCH 6/7] Update update-version.sh to use packaging lib (#16891)

This PR updates the update-version.sh script to use the packaging
library, given that setuptools is no longer included by default in
Python 3.12.
---
 ci/release/update-version.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index b0346327319..f73e88bc0c8 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -25,9 +25,9 @@ NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}')
 NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
 
 # Need to distutils-normalize the versions for some use cases
-CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))")
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
-PATCH_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_PATCH}'))")
+CURRENT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${CURRENT_SHORT_TAG}'))")
+NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
+PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))")
 
 echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
 

From 73fa557186932fa867a0516f8947bb25b97d0f29 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 24 Sep 2024 18:43:02 -0500
Subject: [PATCH 7/7] Update oldest deps for `pyarrow` & `numpy` (#16883)

We recently pinned our `dask-expr` version to `1.1.14`: https://github.com/rapidsai/rapids-dask-dependency/pull/64, that plus latest `dask` seems to be having a minimum requirement for `pyarrow` as `14.0.1`. This is causing failures in our CI matrix while running tests with the oldest dependencies. This PR bumps the minimum pyarrow version in our oldest deps.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16883
---
 ci/cudf_pandas_scripts/run_tests.sh |  4 ++--
 ci/test_python_common.sh            |  4 ++--
 ci/test_python_cudf.sh              |  2 +-
 ci/test_python_other.sh             |  2 +-
 dependencies.yaml                   | 36 +++++++++++++++++++++++++----
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index c6228a4ef33..f6bdc6f9484 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -56,10 +56,10 @@ else
 
     echo "" > ./constraints.txt
     if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
-        # `test_python` constraints are for `[test]` not `[cudf-pandas-tests]`
+        # `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]`
         rapids-dependency-file-generator \
             --output requirements \
-            --file-key test_python \
+            --file-key test_python_cudf_pandas \
             --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
         | tee ./constraints.txt
     fi
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index d0675b0431a..dc70661a17a 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -10,10 +10,10 @@ set -euo pipefail
 rapids-logger "Generate Python testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
-
+FILE_KEY=$1
 rapids-dependency-file-generator \
   --output conda \
-  --file-key test_python \
+  --file-key ${FILE_KEY} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
     | tee "${ENV_YAML_DIR}/env.yaml"
 
diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index ae34047e87f..2386414b32e 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -5,7 +5,7 @@
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../;
 
 # Common setup steps shared by Python test jobs
-source ./ci/test_python_common.sh
+source ./ci/test_python_common.sh test_python_cudf
 
 rapids-logger "Check GPU usage"
 nvidia-smi
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index 06a24773cae..67c97ad29a5 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -5,7 +5,7 @@
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 
 # Common setup steps shared by Python test jobs
-source ./ci/test_python_common.sh
+source ./ci/test_python_common.sh test_python_other
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
diff --git a/dependencies.yaml b/dependencies.yaml
index 01edcb3889a..7a9c9b8486d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -43,15 +43,28 @@ files:
     includes:
       - cuda_version
       - test_cpp
-  test_python:
+  test_python_cudf_pandas:
     output: none
     includes:
       - cuda_version
       - py_version
       - test_python_common
       - test_python_cudf
-      - test_python_dask_cudf
       - test_python_cudf_pandas
+  test_python_cudf:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_python_common
+      - test_python_cudf
+  test_python_other:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_python_common
+      - test_python_dask_cudf
   test_java:
     output: none
     includes:
@@ -707,9 +720,7 @@ dependencies:
           - matrix: {dependencies: "oldest"}
             packages:
               - numba==0.57.*
-              - numpy==1.23.*
               - pandas==2.0.*
-              - pyarrow==14.0.0
           - matrix:
             packages:
       - output_types: conda
@@ -764,6 +775,14 @@ dependencies:
           - &transformers transformers==4.39.3
           - tzdata
     specific:
+      - output_types: [conda, requirements]
+        matrices:
+          - matrix: {dependencies: "oldest"}
+            packages:
+              - numpy==1.23.*
+              - pyarrow==14.0.0
+          - matrix:
+            packages:
       - output_types: conda
         matrices:
           - matrix:
@@ -783,6 +802,15 @@ dependencies:
         packages:
           - dask-cuda==24.10.*,>=0.0.0a0
           - *numba
+    specific:
+      - output_types: [conda, requirements]
+        matrices:
+          - matrix: {dependencies: "oldest"}
+            packages:
+              - numpy==1.24.*
+              - pyarrow==14.0.1
+          - matrix:
+            packages:
   depends_on_libcudf:
     common:
       - output_types: conda