diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 9943b02a521..0ea4d5c54dc 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -67,7 +67,27 @@ jobs:
       node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
       sha: ${{ inputs.sha }}
+  wheel-build-libcudf:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_libcudf.sh
+  wheel-publish-libcudf:
+    needs: wheel-build-libcudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: libcudf
   wheel-build-pylibcudf:
+    needs: [wheel-publish-libcudf]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 74bdc666c68..2e2a8b6b9bc 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,6 +23,7 @@ jobs:
       - static-configure
       - conda-notebook-tests
       - docs-build
+      - wheel-build-libcudf
       - wheel-build-pylibcudf
       - wheel-build-cudf
       - wheel-tests-cudf
@@ -121,10 +122,18 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
-  wheel-build-pylibcudf:
+  wheel-build-libcudf:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    with:
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+      build_type: pull-request
+      script: "ci/build_wheel_libcudf.sh"
+  wheel-build-pylibcudf:
+    needs: [checks, wheel-build-libcudf]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_pylibcudf.sh"
diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index 7c0fb1efebe..cf33703f544 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -5,16 +5,28 @@ set -euo pipefail
 
 package_dir="python/cudf"
 
-export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"
-
-# Download the pylibcudf built in the previous step
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 /tmp/pylibcudf_dist
 
-echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" > /tmp/constraints.txt
+# Downloads libcudf and pylibcudf wheels from this current build,
+# then ensures 'cudf' wheel builds always use the 'libcudf' and 'pylibcudf' just built in the same CI run.
+#
+# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
+# are used when creating the isolated build environment.
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python /tmp/pylibcudf_dist
+echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
+echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt
 export PIP_CONSTRAINT="/tmp/constraints.txt"
+
 ./ci/build_wheel.sh ${package_dir}
 
-python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*
+python -m auditwheel repair \
+    --exclude libcudf.so \
+    --exclude libarrow.so.1601 \
+    --exclude libnvcomp.so \
+    --exclude libnvcomp_bitcomp.so \
+    --exclude libnvcomp_gdeflate.so \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
 
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
new file mode 100755
index 00000000000..9694c3f6144
--- /dev/null
+++ b/ci/build_wheel_libcudf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_dir="python/libcudf"
+
+./ci/build_wheel.sh ${package_dir}
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+mkdir -p ${package_dir}/final_dist
+python -m auditwheel repair --exclude libarrow.so.1601 -w ${package_dir}/final_dist ${package_dir}/dist/*
+
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
index b25d118ff81..7181a49d397 100755
--- a/ci/build_wheel_pylibcudf.sh
+++ b/ci/build_wheel_pylibcudf.sh
@@ -5,12 +5,26 @@ set -euo pipefail
 
 package_dir="python/pylibcudf"
 
-export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-./ci/build_wheel.sh ${package_dir}
+# Downloads libcudf wheel from this current build,
+# then ensures 'pylibcudf' wheel builds always use the 'libcudf' just built in the same CI run.
+#
+# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints
+# are used when creating the isolated build environment.
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist
+echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
+export PIP_CONSTRAINT="/tmp/constraints.txt"
 
-python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*
+./ci/build_wheel.sh ${package_dir}
 
+python -m auditwheel repair \
+    --exclude libcudf.so \
+    --exclude libarrow.so.1601 \
+    --exclude libnvcomp.so \
+    --exclude libnvcomp_bitcomp.so \
+    --exclude libnvcomp_gdeflate.so \
+    -w ${package_dir}/final_dist \
+    ${package_dir}/dist/*
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
diff --git a/ci/cudf_pandas_scripts/fetch_pandas_versions.py b/ci/cudf_pandas_scripts/fetch_pandas_versions.py
new file mode 100644
index 00000000000..b6913f947e8
--- /dev/null
+++ b/ci/cudf_pandas_scripts/fetch_pandas_versions.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import requests
+from packaging.version import Version
+from packaging.specifiers import SpecifierSet
+import argparse
+
+def get_pandas_versions(pandas_range):
+    url = "https://pypi.org/pypi/pandas/json"
+    response = requests.get(url)
+    data = response.json()
+    versions = [Version(v) for v in data['releases']]
+    specifier = SpecifierSet(pandas_range.lstrip("pandas"))
+    matching_versions = [v for v in versions if v in specifier]
+    matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
+    return matching_minors
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
+    parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
+    args = parser.parse_args()
+
+    versions = get_pandas_versions(args.pandas_range)
+    print(','.join(versions))
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index f3a497d3bf0..8487f663311 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -12,9 +12,10 @@ rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-# Download the cudf and pylibcudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 echo "" > ./constraints.txt
 if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
@@ -31,6 +32,7 @@ python -m pip install \
   -v \
   --constraint ./constraints.txt \
   "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,pandas-tests]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index 04dc1785e6e..ca4ab77925f 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -9,13 +9,20 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 
+DEPENDENCIES_PATH="dependencies.yaml"
+package_name="pandas"
+
+# Use grep to find the line containing the package name and version constraint
+pandas_version_constraint=$(grep -oP "pandas>=\d+\.\d+,\<\d+\.\d+\.\d+dev\d+" $DEPENDENCIES_PATH)
+
 # Function to display script usage
 function display_usage {
-    echo "Usage: $0 [--no-cudf]"
+    echo "Usage: $0 [--no-cudf] [pandas-version]"
 }
 
 # Default value for the --no-cudf option
 no_cudf=false
+PANDAS_VERSION=""
 
 # Parse command-line arguments
 while [[ $# -gt 0 ]]; do
@@ -25,9 +32,14 @@ while [[ $# -gt 0 ]]; do
             shift
             ;;
         *)
-            echo "Error: Unknown option $1"
-            display_usage
-            exit 1
+            if [[ -z "$PANDAS_VERSION" ]]; then
+                PANDAS_VERSION=$1
+                shift
+            else
+                echo "Error: Unknown option $1"
+                display_usage
+                exit 1
+            fi
             ;;
     esac
 done
@@ -37,9 +49,10 @@ if [ "$no_cudf" = true ]; then
 else
     RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-    # Download the cudf and pylibcudf built in the previous step
-    RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-    RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+    # Download the cudf, libcudf, and pylibcudf built in the previous step
+    RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+    RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+    RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
     echo "" > ./constraints.txt
     if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
@@ -55,6 +68,7 @@ else
         -v \
         --constraint ./constraints.txt \
         "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \
+        "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
         "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 fi
 
@@ -64,3 +78,19 @@ python -m pytest -p cudf.pandas \
     --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
     --cov-report=term \
     ./python/cudf/cudf_pandas_tests/
+
+output=$(python ci/cudf_pandas_scripts/fetch_pandas_versions.py $pandas_version_constraint)
+
+# Convert the comma-separated list into an array
+IFS=',' read -r -a versions <<< "$output"
+
+for version in "${versions[@]}"; do
+    echo "Installing pandas version: ${version}"
+    python -m pip install "pandas==${version}"
+    python -m pytest -p cudf.pandas \
+    --cov-config=./python/cudf/.coveragerc \
+    --cov=cudf \
+    --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
+    --cov-report=term \
+    ./python/cudf/cudf_pandas_tests/
+done
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 132e58249e6..be55b49870f 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -49,8 +49,10 @@ DEPENDENCIES=(
   dask-cuda
   dask-cudf
   kvikio
+  libcudf
   libkvikio
   librmm
+  pylibcudf
   rapids-dask-dependency
   rmm
 )
@@ -77,7 +79,7 @@ for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
   sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done
-sed_runner "s/branch-[0-9]+\.[0-9]+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
+sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
 
 # Java files
 NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh
index 340814e1f7e..c1003880d4b 100755
--- a/ci/test_wheel_cudf.sh
+++ b/ci/test_wheel_cudf.sh
@@ -5,9 +5,10 @@ set -eou pipefail
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-# Download the cudf and pylibcudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 rapids-logger "Install cudf, pylibcudf, and test requirements"
 
@@ -26,6 +27,7 @@ python -m pip install \
     -v \
     --constraint ./constraints.txt \
   "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
index 841794e08d6..9844090258a 100755
--- a/ci/test_wheel_cudf_polars.sh
+++ b/ci/test_wheel_cudf_polars.sh
@@ -18,10 +18,11 @@ else
 fi
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist
 
-# Download pylibcudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+# Download libcudf and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 rapids-logger "Installing cudf_polars and its dependencies"
 # Constraint to minimum dependency versions if job is set up as "oldest"
@@ -39,6 +40,7 @@ python -m pip install \
     -v \
     --constraint ./constraints.txt \
     "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+    "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
     "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 rapids-logger "Run cudf_polars tests"
diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
index d3b3fcb63bd..0d39807d56c 100755
--- a/ci/test_wheel_dask_cudf.sh
+++ b/ci/test_wheel_dask_cudf.sh
@@ -4,11 +4,12 @@
 set -eou pipefail
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist
 
-# Download the cudf and pylibcudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+# Download the cudf, libcudf, and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 
 rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
 # Constraint to minimum dependency versions if job is set up as "oldest"
@@ -27,6 +28,7 @@ python -m pip install \
   --constraint ./constraints.txt \
   "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+  "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
   "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
 RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ff00c484501..6b8bb26825b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -363,17 +363,14 @@ add_library(
   src/hash/sha512_hash.cu
   src/hash/xxhash_64.cu
   src/interop/dlpack.cpp
-  src/interop/from_arrow.cu
   src/interop/arrow_utilities.cpp
   src/interop/decimal_conversion_utilities.cu
-  src/interop/to_arrow.cu
   src/interop/to_arrow_device.cu
   src/interop/to_arrow_host.cu
   src/interop/from_arrow_device.cu
   src/interop/from_arrow_host.cu
   src/interop/from_arrow_stream.cu
   src/interop/to_arrow_schema.cpp
-  src/interop/detail/arrow_allocator.cpp
   src/io/avro/avro.cpp
   src/io/avro/avro_gpu.cu
   src/io/avro/reader_impl.cu
diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake
index ec8cbd8c568..10e6b026d9a 100644
--- a/cpp/cmake/thirdparty/get_gtest.cmake
+++ b/cpp/cmake/thirdparty/get_gtest.cmake
@@ -16,18 +16,9 @@
 function(find_and_configure_gtest)
   include(${rapids-cmake-dir}/cpm/gtest.cmake)
 
-  # Mark all the non explicit googletest symbols as hidden. This ensures that libcudftestutil can be
-  # used by consumers with a different shared gtest.
-  set(gtest_hide_internal_symbols ON)
-
   # Find or install GoogleTest
   rapids_cpm_gtest(BUILD_STATIC)
 
-  # Mark all the explicit googletest symbols as hidden. This ensures that libcudftestutil can be
-  # used by consumers with a different shared gtest.
-  if(TARGET gtest)
-    target_compile_definitions(gtest PUBLIC "$<BUILD_LOCAL_INTERFACE:GTEST_API_=>")
-  endif()
 endfunction()
 
 find_and_configure_gtest()
diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 0b9319ba663..0d8f078c9d1 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -16,29 +16,13 @@
 
 #pragma once
 
-// We disable warning 611 because the `arrow::TableBatchReader` only partially
-// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
-// triggering warning 611-D from nvcc.
-#ifdef __CUDACC__
-#pragma nv_diag_suppress 611
-#pragma nv_diag_suppress 2810
-#endif
-#include <rmm/resource_ref.hpp>
-
-#include <arrow/api.h>
-#ifdef __CUDACC__
-#pragma nv_diag_default 611
-#pragma nv_diag_default 2810
-#endif
-
 #include <cudf/interop.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/export.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-
-#include <string>
+#include <rmm/resource_ref.hpp>
 
 namespace CUDF_EXPORT cudf {
 namespace detail {
@@ -61,89 +45,6 @@ DLManagedTensor* to_dlpack(table_view const& input,
                            rmm::cuda_stream_view stream,
                            rmm::device_async_resource_ref mr);
 
-// Creating arrow as per given type_id and buffer arguments
-template <typename... Ts>
-std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
-{
-  switch (id) {
-    case type_id::BOOL8: return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
-    case type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
-    case type_id::INT16: return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
-    case type_id::INT32: return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
-    case type_id::INT64: return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
-    case type_id::UINT8: return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
-    case type_id::UINT16: return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
-    case type_id::UINT32: return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
-    case type_id::UINT64: return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
-    case type_id::FLOAT32: return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
-    case type_id::FLOAT64: return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_DAYS:
-      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
-                                                  std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_SECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_MILLISECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_MICROSECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
-                                                     std::forward<Ts>(args)...);
-    case type_id::TIMESTAMP_NANOSECONDS:
-      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
-                                                     std::forward<Ts>(args)...);
-    case type_id::DURATION_SECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_MILLISECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_MICROSECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
-                                                    std::forward<Ts>(args)...);
-    case type_id::DURATION_NANOSECONDS:
-      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
-                                                    std::forward<Ts>(args)...);
-    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
-  }
-}
-
-// Converting arrow type to cudf type
-data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
-
-/**
- * @copydoc cudf::to_arrow(table_view input, std::vector<column_metadata> const& metadata,
- * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
- */
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr);
-
-/**
- * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata,
- * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
- */
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr);
-/**
- * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream,
- * rmm::device_async_resource_ref mr)
- */
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream,
- * rmm::device_async_resource_ref mr)
- */
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr);
-
 /**
  * @brief Return a maximum precision for a given type.
  *
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 9a8f87b4a46..0f52b0f7b31 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -16,21 +16,6 @@
 
 #pragma once
 
-// We disable warning 611 because the `arrow::TableBatchReader` only partially
-// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
-// triggering warning 611-D from nvcc.
-#ifdef __CUDACC__
-#pragma nv_diag_suppress 611
-#pragma nv_diag_suppress 2810
-#endif
-#include <rmm/resource_ref.hpp>
-
-#include <arrow/api.h>
-#ifdef __CUDACC__
-#pragma nv_diag_default 611
-#pragma nv_diag_default 2810
-#endif
-
 #include <cudf/column/column.hpp>
 #include <cudf/detail/transform.hpp>
 #include <cudf/table/table.hpp>
@@ -131,59 +116,6 @@ struct column_metadata {
   column_metadata() = default;
 };
 
-/**
- * @brief Create `arrow::Table` from cudf table `input`
- *
- * Converts the `cudf::table_view` to `arrow::Table` with the provided
- * metadata `column_names`.
- *
- * @deprecated Since 24.08. Use cudf::to_arrow_host instead.
- *
- * @throws cudf::logic_error if `column_names` size doesn't match with number of columns.
- *
- * @param input table_view that needs to be converted to arrow Table
- * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param ar_mr arrow memory pool to allocate memory for arrow Table
- * @return arrow Table generated from `input`
- *
- * @note For decimals, since the precision is not stored for them in libcudf,
- * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
- * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
- * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
- * converted to Arrow decimal128 of the precision 38.
- */
-[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Table> to_arrow(
-  table_view input,
-  std::vector<column_metadata> const& metadata = {},
-  rmm::cuda_stream_view stream                 = cudf::get_default_stream(),
-  arrow::MemoryPool* ar_mr                     = arrow::default_memory_pool());
-
-/**
- * @brief Create `arrow::Scalar` from cudf scalar `input`
- *
- * Converts the `cudf::scalar` to `arrow::Scalar`.
- *
- * @deprecated Since 24.08.
- *
- * @param input scalar that needs to be converted to arrow Scalar
- * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
- * @return arrow Scalar generated from `input`
- *
- * @note For decimals, since the precision is not stored for them in libcudf,
- * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
- * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
- * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
- * converted to Arrow decimal128 of the precision 38.
- */
-[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Scalar> to_arrow(
-  cudf::scalar const& input,
-  column_metadata const& metadata = {},
-  rmm::cuda_stream_view stream    = cudf::get_default_stream(),
-  arrow::MemoryPool* ar_mr        = arrow::default_memory_pool());
-
 /**
  * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter
  *
@@ -386,39 +318,6 @@ unique_device_array_t to_arrow_host(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
-/**
- * @brief Create `cudf::table` from given arrow Table input
- *
- * @deprecated Since 24.08. Use cudf::from_arrow_host instead.
- *
- * @param input arrow:Table that needs to be converted to `cudf::table`
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr    Device memory resource used to allocate `cudf::table`
- * @return cudf table generated from given arrow Table
- */
-[[deprecated("Use cudf::from_arrow_host")]] std::unique_ptr<table> from_arrow(
-  arrow::Table const& input,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Create `cudf::scalar` from given arrow Scalar input
- *
- * @deprecated Since 24.08. Use arrow's `MakeArrayFromScalar` on the
- * input, followed by `ExportArray` to obtain something that can be
- * consumed by `from_arrow_host`. Then use `cudf::get_element` to
- * extract a device scalar from the column.
- *
- * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar`
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr    Device memory resource used to allocate `cudf::scalar`
- * @return cudf scalar generated from given arrow Scalar
- */
-[[deprecated("See docstring for migration strategies")]] std::unique_ptr<cudf::scalar> from_arrow(
-  arrow::Scalar const& input,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Create `cudf::table` from given ArrowArray and ArrowSchema input
  *
diff --git a/cpp/src/interop/detail/arrow_allocator.cpp b/cpp/src/interop/detail/arrow_allocator.cpp
deleted file mode 100644
index 2a19a5360fe..00000000000
--- a/cpp/src/interop/detail/arrow_allocator.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/detail/interop.hpp>
-
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include <memory>
-
-namespace cudf {
-namespace detail {
-
-/*
-  Enable Transparent Huge Pages (THP) for large (>4MB) allocations.
-  `buf` is returned untouched.
-  Enabling THP can improve performance of device-host memory transfers
-  significantly, see <https://github.com/rapidsai/cudf/pull/13914>.
-*/
-template <typename T>
-T enable_hugepage(T&& buf)
-{
-  if (buf->size() < (1u << 22u)) {  // Smaller than 4 MB
-    return std::move(buf);
-  }
-
-#ifdef MADV_HUGEPAGE
-  auto const pagesize = sysconf(_SC_PAGESIZE);
-  void* addr          = const_cast<uint8_t*>(buf->data());
-  if (addr == nullptr) { return std::move(buf); }
-  auto length{static_cast<std::size_t>(buf->size())};
-  if (std::align(pagesize, pagesize, addr, length)) {
-    // Intentionally not checking for errors that may be returned by older kernel versions;
-    // optimistically tries enabling huge pages.
-    madvise(addr, length, MADV_HUGEPAGE);
-  }
-#endif
-  return std::move(buf);
-}
-
-std::unique_ptr<arrow::Buffer> allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr)
-{
-  /*
-  nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer
-  and `ValueOrDie` are used inside a CUDA compilation unit.
-
-  To work around this issue we compile an allocation shim in C++ and use
-  that from our cuda sources
-  */
-  arrow::Result<std::unique_ptr<arrow::Buffer>> result = arrow::AllocateBuffer(size, ar_mr);
-  CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer");
-  return enable_hugepage(std::move(result).ValueOrDie());
-}
-
-std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr)
-{
-  /*
-  nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer
-  and `ValueOrDie` are used inside a CUDA compilation unit.
-
-  To work around this issue we compile an allocation shim in C++ and use
-  that from our cuda sources
-  */
-  arrow::Result<std::shared_ptr<arrow::Buffer>> result = arrow::AllocateBitmap(size, ar_mr);
-  CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow bitmap");
-  return enable_hugepage(std::move(result).ValueOrDie());
-}
-
-}  // namespace detail
-}  // namespace cudf
diff --git a/cpp/src/interop/detail/arrow_allocator.hpp b/cpp/src/interop/detail/arrow_allocator.hpp
deleted file mode 100644
index 75c1baa0dca..00000000000
--- a/cpp/src/interop/detail/arrow_allocator.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/detail/interop.hpp>
-
-namespace cudf {
-namespace detail {
-
-// unique_ptr because that is what AllocateBuffer returns
-std::unique_ptr<arrow::Buffer> allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr);
-
-// shared_ptr because that is what AllocateBitmap returns
-std::shared_ptr<arrow::Buffer> allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr);
-
-}  // namespace detail
-}  // namespace cudf
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
deleted file mode 100644
index 579820cbae3..00000000000
--- a/cpp/src/interop/from_arrow.cu
+++ /dev/null
@@ -1,524 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/detail/concatenate.hpp>
-#include <cudf/detail/copy.hpp>
-#include <cudf/detail/interop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/transform.hpp>
-#include <cudf/detail/unary.hpp>
-#include <cudf/dictionary/dictionary_factories.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/null_mask.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/gather.h>
-
-namespace cudf {
-
-namespace detail {
-data_type arrow_to_cudf_type(arrow::DataType const& arrow_type)
-{
-  switch (arrow_type.id()) {
-    case arrow::Type::NA: return data_type(type_id::EMPTY);
-    case arrow::Type::BOOL: return data_type(type_id::BOOL8);
-    case arrow::Type::INT8: return data_type(type_id::INT8);
-    case arrow::Type::INT16: return data_type(type_id::INT16);
-    case arrow::Type::INT32: return data_type(type_id::INT32);
-    case arrow::Type::INT64: return data_type(type_id::INT64);
-    case arrow::Type::UINT8: return data_type(type_id::UINT8);
-    case arrow::Type::UINT16: return data_type(type_id::UINT16);
-    case arrow::Type::UINT32: return data_type(type_id::UINT32);
-    case arrow::Type::UINT64: return data_type(type_id::UINT64);
-    case arrow::Type::FLOAT: return data_type(type_id::FLOAT32);
-    case arrow::Type::DOUBLE: return data_type(type_id::FLOAT64);
-    case arrow::Type::DATE32: return data_type(type_id::TIMESTAMP_DAYS);
-    case arrow::Type::TIMESTAMP: {
-      auto type = static_cast<arrow::TimestampType const*>(&arrow_type);
-      switch (type->unit()) {
-        case arrow::TimeUnit::type::SECOND: return data_type(type_id::TIMESTAMP_SECONDS);
-        case arrow::TimeUnit::type::MILLI: return data_type(type_id::TIMESTAMP_MILLISECONDS);
-        case arrow::TimeUnit::type::MICRO: return data_type(type_id::TIMESTAMP_MICROSECONDS);
-        case arrow::TimeUnit::type::NANO: return data_type(type_id::TIMESTAMP_NANOSECONDS);
-        default: CUDF_FAIL("Unsupported timestamp unit in arrow");
-      }
-    }
-    case arrow::Type::DURATION: {
-      auto type = static_cast<arrow::DurationType const*>(&arrow_type);
-      switch (type->unit()) {
-        case arrow::TimeUnit::type::SECOND: return data_type(type_id::DURATION_SECONDS);
-        case arrow::TimeUnit::type::MILLI: return data_type(type_id::DURATION_MILLISECONDS);
-        case arrow::TimeUnit::type::MICRO: return data_type(type_id::DURATION_MICROSECONDS);
-        case arrow::TimeUnit::type::NANO: return data_type(type_id::DURATION_NANOSECONDS);
-        default: CUDF_FAIL("Unsupported duration unit in arrow");
-      }
-    }
-    case arrow::Type::STRING: return data_type(type_id::STRING);
-    case arrow::Type::LARGE_STRING: return data_type(type_id::STRING);
-    case arrow::Type::DICTIONARY: return data_type(type_id::DICTIONARY32);
-    case arrow::Type::LIST: return data_type(type_id::LIST);
-    case arrow::Type::DECIMAL: {
-      auto const type = static_cast<arrow::Decimal128Type const*>(&arrow_type);
-      return data_type{type_id::DECIMAL128, -type->scale()};
-    }
-    case arrow::Type::STRUCT: return data_type(type_id::STRUCT);
-    default: CUDF_FAIL("Unsupported type_id conversion to cudf");
-  }
-}
-
-namespace {
-/**
- * @brief Functor to return column for a corresponding arrow array. column
- * is formed from buffer underneath the arrow array along with any offset and
- * change in length that array has.
- */
-struct dispatch_to_cudf_column {
-  /**
-   * @brief Returns mask from an array without any offsets.
-   */
-  std::unique_ptr<rmm::device_buffer> get_mask_buffer(arrow::Array const& array,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::device_async_resource_ref mr)
-  {
-    if (array.null_bitmap_data() == nullptr) {
-      return std::make_unique<rmm::device_buffer>(0, stream, mr);
-    }
-    auto const null_bitmap_size = array.null_bitmap()->size();
-    auto const allocation_size =
-      bitmask_allocation_size_bytes(static_cast<size_type>(null_bitmap_size * CHAR_BIT));
-    auto mask        = std::make_unique<rmm::device_buffer>(allocation_size, stream, mr);
-    auto mask_buffer = array.null_bitmap();
-    CUDF_CUDA_TRY(cudaMemcpyAsync(mask->data(),
-                                  reinterpret_cast<uint8_t const*>(mask_buffer->address()),
-                                  null_bitmap_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-    // Zero-initialize trailing padding bytes
-    auto const num_trailing_bytes = allocation_size - null_bitmap_size;
-    if (num_trailing_bytes > 0) {
-      auto trailing_bytes = static_cast<uint8_t*>(mask->data()) + null_bitmap_size;
-      CUDF_CUDA_TRY(cudaMemsetAsync(trailing_bytes, 0, num_trailing_bytes, stream.value()));
-    }
-    return mask;
-  }
-
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
-  std::unique_ptr<column> operator()(
-    arrow::Array const&, data_type, bool, rmm::cuda_stream_view, rmm::device_async_resource_ref)
-  {
-    CUDF_FAIL("Unsupported type in from_arrow.");
-  }
-
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  std::unique_ptr<column> operator()(arrow::Array const& array,
-                                     data_type type,
-                                     bool skip_mask,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-  {
-    auto data_buffer         = array.data()->buffers[1];
-    size_type const num_rows = array.length();
-    auto const has_nulls     = skip_mask ? false : array.null_bitmap_data() != nullptr;
-    auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr);
-    auto mutable_column_view = col->mutable_view();
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      mutable_column_view.data<T>(),
-      reinterpret_cast<uint8_t const*>(data_buffer->address()) + array.offset() * sizeof(T),
-      sizeof(T) * num_rows,
-      cudaMemcpyDefault,
-      stream.value()));
-    if (has_nulls) {
-      auto tmp_mask = get_mask_buffer(array, stream, mr);
-
-      // If array is sliced, we have to copy whole mask and then take copy.
-      auto out_mask = (num_rows == static_cast<size_type>(data_buffer->size() / sizeof(T)))
-                        ? std::move(*tmp_mask)
-                        : cudf::detail::copy_bitmask(static_cast<bitmask_type*>(tmp_mask->data()),
-                                                     array.offset(),
-                                                     array.offset() + num_rows,
-                                                     stream,
-                                                     mr);
-
-      col->set_null_mask(std::move(out_mask), array.null_count());
-    }
-
-    return col;
-  }
-};
-
-std::unique_ptr<column> get_empty_type_column(size_type size)
-{
-  // this abomination is required by cuDF Python, which needs to handle
-  // [PyArrow null arrays](https://arrow.apache.org/docs/python/generated/pyarrow.NullArray.html)
-  // of finite length
-  return std::make_unique<column>(
-    data_type(type_id::EMPTY), size, rmm::device_buffer{}, rmm::device_buffer{}, size);
-}
-
-/**
- * @brief Returns cudf column formed from given arrow array
- * This has been introduced to take care of compiler error "error: explicit specialization of
- * function must precede its first use"
- */
-std::unique_ptr<column> get_column(arrow::Array const& array,
-                                   data_type type,
-                                   bool skip_mask,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref mr);
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<numeric::decimal128>(
-  arrow::Array const& array,
-  data_type type,
-  bool skip_mask,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  using DeviceType = __int128_t;
-
-  auto data_buffer    = array.data()->buffers[1];
-  auto const num_rows = static_cast<size_type>(array.length());
-  auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr);
-  auto mutable_column_view = col->mutable_view();
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    mutable_column_view.data<DeviceType>(),
-    reinterpret_cast<uint8_t const*>(data_buffer->address()) + array.offset() * sizeof(DeviceType),
-    sizeof(DeviceType) * num_rows,
-    cudaMemcpyDefault,
-    stream.value()));
-
-  auto null_mask = [&] {
-    if (not skip_mask and array.null_bitmap_data()) {
-      auto temp_mask = get_mask_buffer(array, stream, mr);
-      // If array is sliced, we have to copy whole mask and then take copy.
-      return (num_rows == static_cast<size_type>(data_buffer->size() / sizeof(DeviceType)))
-               ? std::move(*temp_mask.release())
-               : cudf::detail::copy_bitmask(static_cast<bitmask_type*>(temp_mask->data()),
-                                            array.offset(),
-                                            array.offset() + num_rows,
-                                            stream,
-                                            mr);
-    }
-    return rmm::device_buffer{};
-  }();
-
-  col->set_null_mask(std::move(null_mask), array.null_count());
-  return col;
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<bool>(arrow::Array const& array,
-                                                                  data_type,
-                                                                  bool skip_mask,
-                                                                  rmm::cuda_stream_view stream,
-                                                                  rmm::device_async_resource_ref mr)
-{
-  auto data_buffer = array.data()->buffers[1];
-  // mask-to-bools expects the mask to be bitmask_type aligned/padded
-  auto data = rmm::device_buffer(
-    cudf::bitmask_allocation_size_bytes(data_buffer->size() * CHAR_BIT), stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(),
-                                reinterpret_cast<uint8_t const*>(data_buffer->address()),
-                                data_buffer->size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  auto out_col = mask_to_bools(static_cast<bitmask_type*>(data.data()),
-                               array.offset(),
-                               array.offset() + array.length(),
-                               stream,
-                               mr);
-
-  auto const has_nulls = skip_mask ? false : array.null_bitmap_data() != nullptr;
-  if (has_nulls) {
-    auto out_mask =
-      detail::copy_bitmask(static_cast<bitmask_type*>(get_mask_buffer(array, stream, mr)->data()),
-                           array.offset(),
-                           array.offset() + array.length(),
-                           stream,
-                           mr);
-
-    out_col->set_null_mask(std::move(out_mask), array.null_count());
-  }
-
-  return out_col;
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  if (array.length() == 0) { return make_empty_column(type_id::STRING); }
-
-  std::unique_ptr<column> offsets_column;
-  std::unique_ptr<arrow::Array> char_array;
-
-  if (array.type_id() == arrow::Type::LARGE_STRING) {
-    auto str_array    = static_cast<arrow::LargeStringArray const*>(&array);
-    auto offset_array = std::make_unique<arrow::Int64Array>(
-      str_array->value_offsets()->size() / sizeof(int64_t), str_array->value_offsets(), nullptr);
-    offsets_column = dispatch_to_cudf_column{}.operator()<int64_t>(
-      *offset_array, data_type(type_id::INT64), true, stream, mr);
-    char_array = std::make_unique<arrow::Int8Array>(
-      str_array->value_data()->size(), str_array->value_data(), nullptr);
-  } else if (array.type_id() == arrow::Type::STRING) {
-    auto str_array    = static_cast<arrow::StringArray const*>(&array);
-    auto offset_array = std::make_unique<arrow::Int32Array>(
-      str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr);
-    offsets_column = dispatch_to_cudf_column{}.operator()<int32_t>(
-      *offset_array, data_type(type_id::INT32), true, stream, mr);
-    char_array = std::make_unique<arrow::Int8Array>(
-      str_array->value_data()->size(), str_array->value_data(), nullptr);
-  } else {
-    throw std::runtime_error("Unsupported array type");
-  }
-
-  rmm::device_buffer chars(char_array->length(), stream, mr);
-  auto data_buffer = char_array->data()->buffers[1];
-  CUDF_CUDA_TRY(cudaMemcpyAsync(chars.data(),
-                                reinterpret_cast<uint8_t const*>(data_buffer->address()),
-                                chars.size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_strings_column(num_rows,
-                                     std::move(offsets_column),
-                                     std::move(chars),
-                                     array.null_count(),
-                                     std::move(*get_mask_buffer(array, stream, mr)));
-
-  return num_rows == array.length()
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(array.offset()),
-                                   static_cast<size_type>(array.offset() + array.length()),
-                                   stream),
-               stream,
-               mr);
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::dictionary32>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto dict_array  = static_cast<arrow::DictionaryArray const*>(&array);
-  auto dict_type   = arrow_to_cudf_type(*(dict_array->dictionary()->type()));
-  auto keys_column = get_column(*(dict_array->dictionary()), dict_type, true, stream, mr);
-  auto ind_type    = arrow_to_cudf_type(*(dict_array->indices()->type()));
-
-  auto indices_column = get_column(*(dict_array->indices()), ind_type, false, stream, mr);
-  // If index type is not of type uint32_t, then cast it to uint32_t
-  auto const dict_indices_type = data_type{type_id::UINT32};
-  if (indices_column->type().id() != dict_indices_type.id())
-    indices_column = cudf::detail::cast(indices_column->view(), dict_indices_type, stream, mr);
-
-  // Child columns shouldn't have masks and we need the mask in main column
-  auto column_contents = indices_column->release();
-  indices_column       = std::make_unique<column>(dict_indices_type,
-                                            static_cast<size_type>(array.length()),
-                                            std::move(*(column_contents.data)),
-                                            rmm::device_buffer{},
-                                            0);
-
-  return make_dictionary_column(std::move(keys_column),
-                                std::move(indices_column),
-                                std::move(*(column_contents.null_mask)),
-                                array.null_count());
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::struct_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto struct_array = static_cast<arrow::StructArray const*>(&array);
-  std::vector<std::unique_ptr<column>> child_columns;
-  // Offsets have already been applied to child
-  arrow::ArrayVector array_children = struct_array->fields();
-  std::transform(array_children.cbegin(),
-                 array_children.cend(),
-                 std::back_inserter(child_columns),
-                 [&mr, &stream](auto const& child_array) {
-                   auto type = arrow_to_cudf_type(*(child_array->type()));
-                   return get_column(*child_array, type, false, stream, mr);
-                 });
-
-  auto out_mask = std::move(*(get_mask_buffer(array, stream, mr)));
-  if (struct_array->null_bitmap_data() != nullptr) {
-    out_mask = detail::copy_bitmask(static_cast<bitmask_type*>(out_mask.data()),
-                                    array.offset(),
-                                    array.offset() + array.length(),
-                                    stream,
-                                    mr);
-  }
-
-  return make_structs_column(
-    array.length(), move(child_columns), array.null_count(), std::move(out_mask), stream, mr);
-}
-
-template <>
-std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::list_view>(
-  arrow::Array const& array,
-  data_type,
-  bool,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto list_array   = static_cast<arrow::ListArray const*>(&array);
-  auto offset_array = std::make_unique<arrow::Int32Array>(
-    list_array->value_offsets()->size() / sizeof(int32_t), list_array->value_offsets(), nullptr);
-  auto offsets_column = dispatch_to_cudf_column{}.operator()<int32_t>(
-    *offset_array, data_type(type_id::INT32), true, stream, mr);
-
-  auto child_type   = arrow_to_cudf_type(*(list_array->values()->type()));
-  auto child_column = get_column(*(list_array->values()), child_type, false, stream, mr);
-
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_lists_column(num_rows,
-                                   std::move(offsets_column),
-                                   std::move(child_column),
-                                   array.null_count(),
-                                   std::move(*get_mask_buffer(array, stream, mr)),
-                                   stream,
-                                   mr);
-
-  return num_rows == array.length()
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(array.offset()),
-                                   static_cast<size_type>(array.offset() + array.length()),
-                                   stream),
-               stream,
-               mr);
-}
-
-std::unique_ptr<column> get_column(arrow::Array const& array,
-                                   data_type type,
-                                   bool skip_mask,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref mr)
-{
-  return type.id() != type_id::EMPTY
-           ? type_dispatcher(type, dispatch_to_cudf_column{}, array, type, skip_mask, stream, mr)
-           : get_empty_type_column(array.length());
-}
-
-}  // namespace
-
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  if (input_table.num_columns() == 0) { return std::make_unique<table>(); }
-  std::vector<std::unique_ptr<column>> columns;
-  auto chunked_arrays = input_table.columns();
-  std::transform(chunked_arrays.begin(),
-                 chunked_arrays.end(),
-                 std::back_inserter(columns),
-                 [&mr, &stream](auto const& chunked_array) {
-                   std::vector<std::unique_ptr<column>> concat_columns;
-                   auto cudf_type    = arrow_to_cudf_type(*(chunked_array->type()));
-                   auto array_chunks = chunked_array->chunks();
-                   if (cudf_type.id() == type_id::EMPTY) {
-                     return get_empty_type_column(chunked_array->length());
-                   }
-                   std::transform(array_chunks.begin(),
-                                  array_chunks.end(),
-                                  std::back_inserter(concat_columns),
-                                  [&cudf_type, &mr, &stream](auto const& array_chunk) {
-                                    return get_column(*array_chunk, cudf_type, false, stream, mr);
-                                  });
-                   if (concat_columns.empty()) {
-                     return std::make_unique<column>(
-                       cudf_type, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0);
-                   } else if (concat_columns.size() == 1) {
-                     return std::move(concat_columns[0]);
-                   }
-
-                   std::vector<cudf::column_view> column_views;
-                   std::transform(concat_columns.begin(),
-                                  concat_columns.end(),
-                                  std::back_inserter(column_views),
-                                  [](auto const& col) { return col->view(); });
-                   return cudf::detail::concatenate(column_views, stream, mr);
-                 });
-
-  return std::make_unique<table>(std::move(columns));
-}
-
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr)
-{
-  auto maybe_array = arrow::MakeArrayFromScalar(input, 1);
-  if (!maybe_array.ok()) { CUDF_FAIL("Failed to create array"); }
-  auto array = *maybe_array;
-
-  auto field = arrow::field("", input.type);
-
-  auto table = arrow::Table::Make(arrow::schema({field}), {array});
-
-  auto cudf_table = detail::from_arrow(*table, stream, mr);
-
-  auto cv = cudf_table->view().column(0);
-  return get_element(cv, 0, stream);
-}
-
-}  // namespace detail
-
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-
-  return detail::from_arrow(input_table, stream, mr);
-}
-
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-
-  return detail::from_arrow(input, stream, mr);
-}
-}  // namespace cudf
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
deleted file mode 100644
index a867d4adfa1..00000000000
--- a/cpp/src/interop/to_arrow.cu
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arrow_utilities.hpp"
-#include "decimal_conversion_utilities.cuh"
-#include "detail/arrow_allocator.hpp"
-
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/detail/interop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/unary.hpp>
-#include <cudf/dictionary/dictionary_column_view.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/null_mask.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <thrust/copy.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
-
-namespace cudf {
-namespace detail {
-namespace {
-
-/**
- * @brief Create arrow data buffer from given cudf column
- */
-template <typename T>
-std::shared_ptr<arrow::Buffer> fetch_data_buffer(device_span<T const> input,
-                                                 arrow::MemoryPool* ar_mr,
-                                                 rmm::cuda_stream_view stream)
-{
-  int64_t const data_size_in_bytes = sizeof(T) * input.size();
-
-  auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                input.data(),
-                                data_size_in_bytes,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  return std::move(data_buffer);
-}
-
-/**
- * @brief Create arrow buffer of mask from given cudf column
- */
-std::shared_ptr<arrow::Buffer> fetch_mask_buffer(column_view input_view,
-                                                 arrow::MemoryPool* ar_mr,
-                                                 rmm::cuda_stream_view stream)
-{
-  int64_t const mask_size_in_bytes = cudf::bitmask_allocation_size_bytes(input_view.size());
-
-  if (input_view.has_nulls()) {
-    auto mask_buffer = allocate_arrow_bitmap(static_cast<int64_t>(input_view.size()), ar_mr);
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      mask_buffer->mutable_data(),
-      (input_view.offset() > 0)
-        ? cudf::detail::copy_bitmask(input_view, stream, rmm::mr::get_current_device_resource())
-            .data()
-        : input_view.null_mask(),
-      mask_size_in_bytes,
-      cudaMemcpyDefault,
-      stream.value()));
-
-    // Resets all padded bits to 0
-    mask_buffer->ZeroPadding();
-
-    return mask_buffer;
-  }
-
-  return nullptr;
-}
-
-/**
- * @brief Functor to convert cudf column to arrow array
- */
-struct dispatch_to_arrow {
-  /**
-   * @brief Creates vector Arrays from given cudf column children
-   */
-  std::vector<std::shared_ptr<arrow::Array>> fetch_child_array(
-    column_view input_view,
-    std::vector<column_metadata> const& metadata,
-    arrow::MemoryPool* ar_mr,
-    rmm::cuda_stream_view stream)
-  {
-    std::vector<std::shared_ptr<arrow::Array>> child_arrays;
-    std::transform(
-      input_view.child_begin(),
-      input_view.child_end(),
-      metadata.begin(),
-      std::back_inserter(child_arrays),
-      [&ar_mr, &stream](auto const& child, auto const& meta) {
-        return type_dispatcher(
-          child.type(), dispatch_to_arrow{}, child, child.type().id(), meta, ar_mr, stream);
-      });
-    return child_arrays;
-  }
-
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
-  std::shared_ptr<arrow::Array> operator()(
-    column_view, cudf::type_id, column_metadata const&, arrow::MemoryPool*, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Unsupported type for to_arrow.");
-  }
-
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  std::shared_ptr<arrow::Array> operator()(column_view input_view,
-                                           cudf::type_id id,
-                                           column_metadata const&,
-                                           arrow::MemoryPool* ar_mr,
-                                           rmm::cuda_stream_view stream)
-  {
-    return to_arrow_array(
-      id,
-      static_cast<int64_t>(input_view.size()),
-      fetch_data_buffer<T>(
-        device_span<T const>(input_view.data<T>(), input_view.size()), ar_mr, stream),
-      fetch_mask_buffer(input_view, ar_mr, stream),
-      static_cast<int64_t>(input_view.null_count()));
-  }
-};
-
-// Convert decimal types from libcudf to arrow where those types are not
-// directly supported by Arrow. These types must be fit into 128 bits, the
-// smallest decimal resolution supported by Arrow.
-template <typename DeviceType>
-std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
-                                                            int32_t precision,
-                                                            arrow::MemoryPool* ar_mr,
-                                                            rmm::cuda_stream_view stream)
-{
-  auto buf = detail::convert_decimals_to_decimal128<DeviceType>(
-    input, stream, rmm::mr::get_current_device_resource());
-
-  // Synchronize stream here to ensure the decimal128 buffer is ready.
-  stream.synchronize();
-
-  auto const buf_size_in_bytes = buf->size();
-  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                buf->data(),
-                                buf_size_in_bytes,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  auto type    = arrow::decimal(precision, -input.type().scale());
-  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
-  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
-  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
-
-  return std::make_shared<arrow::Decimal128Array>(data);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType = int32_t;
-  return unsupported_decimals_to_arrow<DeviceType>(
-    input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType = int64_t;
-  return unsupported_decimals_to_arrow<DeviceType>(
-    input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  using DeviceType         = __int128_t;
-  auto const max_precision = cudf::detail::max_precision<DeviceType>();
-
-  rmm::device_uvector<DeviceType> buf(input.size(), stream);
-
-  thrust::copy(rmm::exec_policy(stream),  //
-               input.begin<DeviceType>(),
-               input.end<DeviceType>(),
-               buf.begin());
-
-  auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
-  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));
-
-  auto type    = arrow::decimal(max_precision, -input.type().scale());
-  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
-  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
-  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
-
-  return std::make_shared<arrow::Decimal128Array>(data);
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<bool>(column_view input,
-                                                                  cudf::type_id id,
-                                                                  column_metadata const&,
-                                                                  arrow::MemoryPool* ar_mr,
-                                                                  rmm::cuda_stream_view stream)
-{
-  auto bitmask = detail::bools_to_mask(input, stream, rmm::mr::get_current_device_resource());
-
-  auto data_buffer = allocate_arrow_buffer(static_cast<int64_t>(bitmask.first->size()), ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(),
-                                bitmask.first->data(),
-                                bitmask.first->size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  return to_arrow_array(id,
-                        static_cast<int64_t>(input.size()),
-                        std::move(data_buffer),
-                        fetch_mask_buffer(input, ar_mr, stream),
-                        static_cast<int64_t>(input.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::string_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  std::unique_ptr<column> tmp_column =
-    ((input.offset() != 0) or
-     ((input.num_children() == 1) and (input.child(0).size() - 1 != input.size())))
-      ? std::make_unique<cudf::column>(input, stream)
-      : nullptr;
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto child_arrays      = fetch_child_array(input_view, {{}, {}}, ar_mr, stream);
-  if (child_arrays.empty()) {
-    // Empty string will have only one value in offset of 4 bytes
-    auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr);
-    auto tmp_data_buffer   = allocate_arrow_buffer(0, ar_mr);
-    memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t));
-
-    return std::make_shared<arrow::StringArray>(
-      0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer));
-  }
-  auto offset_buffer = child_arrays[strings_column_view::offsets_column_index]->data()->buffers[1];
-  auto const sview   = strings_column_view{input_view};
-  auto data_buffer   = fetch_data_buffer<char>(
-    device_span<char const>{sview.chars_begin(stream),
-                              static_cast<std::size_t>(sview.chars_size(stream))},
-    ar_mr,
-    stream);
-  if (sview.offsets().type().id() == cudf::type_id::INT64) {
-    return std::make_shared<arrow::LargeStringArray>(static_cast<int64_t>(input_view.size()),
-                                                     offset_buffer,
-                                                     data_buffer,
-                                                     fetch_mask_buffer(input_view, ar_mr, stream),
-                                                     static_cast<int64_t>(input_view.null_count()));
-  } else {
-    return std::make_shared<arrow::StringArray>(static_cast<int64_t>(input_view.size()),
-                                                offset_buffer,
-                                                data_buffer,
-                                                fetch_mask_buffer(input_view, ar_mr, stream),
-                                                static_cast<int64_t>(input_view.null_count()));
-  }
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::struct_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(metadata.children_meta.size() == static_cast<std::size_t>(input.num_children()),
-               "Number of field names and number of children doesn't match\n");
-  std::unique_ptr<column> tmp_column = nullptr;
-
-  if (input.offset() != 0) { tmp_column = std::make_unique<cudf::column>(input, stream); }
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto child_arrays      = fetch_child_array(input_view, metadata.children_meta, ar_mr, stream);
-  auto mask              = fetch_mask_buffer(input_view, ar_mr, stream);
-
-  std::vector<std::shared_ptr<arrow::Field>> fields;
-  std::transform(child_arrays.cbegin(),
-                 child_arrays.cend(),
-                 metadata.children_meta.cbegin(),
-                 std::back_inserter(fields),
-                 [](auto const array, auto const meta) {
-                   return std::make_shared<arrow::Field>(
-                     meta.name, array->type(), array->null_count() > 0);
-                 });
-  auto dtype = std::make_shared<arrow::StructType>(fields);
-
-  return std::make_shared<arrow::StructArray>(dtype,
-                                              static_cast<int64_t>(input_view.size()),
-                                              child_arrays,
-                                              mask,
-                                              static_cast<int64_t>(input_view.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::list_view>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(metadata.children_meta.empty() ||
-                 metadata.children_meta.size() == static_cast<std::size_t>(input.num_children()),
-               "Number of field names and number of children do not match\n");
-  std::unique_ptr<column> tmp_column = nullptr;
-  if ((input.offset() != 0) or
-      ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) {
-    tmp_column = std::make_unique<cudf::column>(input, stream);
-  }
-
-  column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input;
-  auto children_meta =
-    metadata.children_meta.empty() ? std::vector<column_metadata>{{}, {}} : metadata.children_meta;
-  auto child_arrays = fetch_child_array(input_view, children_meta, ar_mr, stream);
-  if (child_arrays.empty() || child_arrays[0]->data()->length == 0) {
-    auto element_type = child_arrays.empty() ? arrow::null() : child_arrays[1]->type();
-    auto result       = arrow::MakeEmptyArray(arrow::list(element_type), ar_mr);
-    CUDF_EXPECTS(result.ok(), "Failed to construct empty arrow list array\n");
-    return result.ValueUnsafe();
-  }
-
-  auto offset_buffer = child_arrays[0]->data()->buffers[1];
-  auto data          = child_arrays[1];
-  return std::make_shared<arrow::ListArray>(arrow::list(data->type()),
-                                            static_cast<int64_t>(input_view.size()),
-                                            offset_buffer,
-                                            data,
-                                            fetch_mask_buffer(input_view, ar_mr, stream),
-                                            static_cast<int64_t>(input_view.null_count()));
-}
-
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::dictionary32>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const& metadata,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
-{
-  // Arrow dictionary requires indices to be signed integer
-  std::unique_ptr<column> dict_indices =
-    detail::cast(cudf::dictionary_column_view(input).get_indices_annotated(),
-                 cudf::data_type{type_id::INT32},
-                 stream,
-                 rmm::mr::get_current_device_resource());
-  auto indices = dispatch_to_arrow{}.operator()<int32_t>(
-    dict_indices->view(), dict_indices->type().id(), {}, ar_mr, stream);
-  auto dict_keys = cudf::dictionary_column_view(input).keys();
-  auto dictionary =
-    type_dispatcher(dict_keys.type(),
-                    dispatch_to_arrow{},
-                    dict_keys,
-                    dict_keys.type().id(),
-                    metadata.children_meta.empty() ? column_metadata{} : metadata.children_meta[0],
-                    ar_mr,
-                    stream);
-
-  return std::make_shared<arrow::DictionaryArray>(
-    arrow::dictionary(indices->type(), dictionary->type()), indices, dictionary);
-}
-}  // namespace
-
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr)
-{
-  CUDF_EXPECTS((metadata.size() == static_cast<std::size_t>(input.num_columns())),
-               "columns' metadata should be equal to number of columns in table");
-
-  std::vector<std::shared_ptr<arrow::Array>> arrays;
-  std::vector<std::shared_ptr<arrow::Field>> fields;
-
-  std::transform(
-    input.begin(),
-    input.end(),
-    metadata.begin(),
-    std::back_inserter(arrays),
-    [&](auto const& c, auto const& meta) {
-      return c.type().id() != type_id::EMPTY
-               ? type_dispatcher(
-                   c.type(), detail::dispatch_to_arrow{}, c, c.type().id(), meta, ar_mr, stream)
-               : std::make_shared<arrow::NullArray>(c.size());
-    });
-
-  std::transform(
-    arrays.begin(),
-    arrays.end(),
-    metadata.begin(),
-    std::back_inserter(fields),
-    [](auto const& array, auto const& meta) { return arrow::field(meta.name, array->type()); });
-
-  auto result = arrow::Table::Make(arrow::schema(fields), arrays);
-
-  // synchronize the stream because after the return the data may be accessed from the host before
-  // the above `cudaMemcpyAsync` calls have completed their copies (especially if pinned host
-  // memory is used).
-  stream.synchronize();
-
-  return result;
-}
-
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr)
-{
-  auto const column = cudf::make_column_from_scalar(input, 1, stream);
-  cudf::table_view const tv{{column->view()}};
-  auto const arrow_table  = detail::to_arrow(tv, {metadata}, stream, ar_mr);
-  auto const ac           = arrow_table->column(0);
-  auto const maybe_scalar = ac->GetScalar(0);
-  if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
-  return maybe_scalar.ValueOrDie();
-}
-}  // namespace detail
-
-std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata,
-                                       rmm::cuda_stream_view stream,
-                                       arrow::MemoryPool* ar_mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, stream, ar_mr);
-}
-
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
-                                        column_metadata const& metadata,
-                                        rmm::cuda_stream_view stream,
-                                        arrow::MemoryPool* ar_mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, stream, ar_mr);
-}
-}  // namespace cudf
diff --git a/cpp/src/rolling/jit/operation.hpp b/cpp/src/rolling/jit/operation.hpp
index f8a52c03d4e..3be739ec5bf 100644
--- a/cpp/src/rolling/jit/operation.hpp
+++ b/cpp/src/rolling/jit/operation.hpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include "rolling/jit/operation-udf.hpp"
 
 #include <cudf/types.hpp>
 
-#pragma once
-
 struct rolling_udf_ptx {
   template <typename OutType, typename InType>
   static OutType operate(InType const* in_col, cudf::size_type start, cudf::size_type count)
diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp
index 08eada632a5..70a9fe64d70 100644
--- a/cpp/tests/interop/arrow_utils.hpp
+++ b/cpp/tests/interop/arrow_utils.hpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -30,11 +32,65 @@
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 
+#include <arrow/api.h>
 #include <arrow/util/bitmap_builders.h>
 
-#include <algorithm>
-
-#pragma once
+// Creating arrow as per given type_id and buffer arguments
+template <typename... Ts>
+std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
+{
+  switch (id) {
+    case cudf::type_id::BOOL8:
+      return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT16:
+      return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT32:
+      return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT64:
+      return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT8:
+      return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT16:
+      return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT32:
+      return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT64:
+      return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT32:
+      return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT64:
+      return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_DAYS:
+      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
+                                                  std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_SECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MILLISECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MICROSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_NANOSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_SECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MILLISECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MICROSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_NANOSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
+                                                    std::forward<Ts>(args)...);
+    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
+  }
+}
 
 template <typename T>
 std::enable_if_t<cudf::is_fixed_width<T>() and !std::is_same_v<T, bool>,
@@ -50,7 +106,7 @@ get_arrow_array(std::vector<T> const& data, std::vector<uint8_t> const& mask = {
   std::shared_ptr<arrow::Buffer> mask_buffer =
     mask.empty() ? nullptr : arrow::internal::BytesToBits(mask).ValueOrDie();
 
-  return cudf::detail::to_arrow_array(cudf::type_to_id<T>(), data.size(), data_buffer, mask_buffer);
+  return to_arrow_array(cudf::type_to_id<T>(), data.size(), data_buffer, mask_buffer);
 }
 
 template <typename T>
diff --git a/dependencies.yaml b/dependencies.yaml
index b7c5c865d9b..7fb29572921 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -13,6 +13,7 @@ files:
       - cuda
       - cuda_version
       - depends_on_cupy
+      - depends_on_librmm
       - depends_on_rmm
       - develop
       - docs
@@ -95,6 +96,8 @@ files:
       - build_base
       - build_python_common
       - depends_on_pylibcudf
+      - depends_on_libcudf
+      - depends_on_librmm
       - depends_on_rmm
   py_run_cudf:
     output: pyproject
@@ -106,6 +109,7 @@ files:
       - run_cudf
       - pyarrow_run
       - depends_on_cupy
+      - depends_on_libcudf
       - depends_on_pylibcudf
       - depends_on_rmm
   py_test_cudf:
@@ -117,6 +121,31 @@ files:
     includes:
       - test_python_common
       - test_python_cudf
+  py_build_libcudf:
+    output: pyproject
+    pyproject_dir: python/libcudf
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_skbuild
+  py_rapids_build_libcudf:
+    output: pyproject
+    pyproject_dir: python/libcudf
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - build_base
+      - build_cpp
+      - build_python_libcudf
+      - depends_on_librmm
+  py_run_libcudf:
+    output: pyproject
+    pyproject_dir: python/libcudf
+    extras:
+      table: project
+    includes:
+      - pyarrow_run
   py_build_pylibcudf:
     output: pyproject
     pyproject_dir: python/pylibcudf
@@ -133,6 +162,8 @@ files:
     includes:
       - build_base
       - build_python_common
+      - depends_on_libcudf
+      - depends_on_librmm
       - depends_on_rmm
   py_run_pylibcudf:
     output: pyproject
@@ -140,6 +171,7 @@ files:
     extras:
       table: project
     includes:
+      - depends_on_libcudf
       - depends_on_rmm
       - pyarrow_run
       - run_pylibcudf
@@ -359,13 +391,18 @@ dependencies:
           - cython>=3.0.3
           # Hard pin the patch version used during the build. This must be kept
           # in sync with the version pinned in get_arrow.cmake.
-          - pyarrow==16.1.0.*
+          - &pyarrow_build pyarrow==16.1.0.*
       - output_types: pyproject
         packages:
           # Hard pin the patch version used during the build.
           # Sync with conda build constraint & wheel run constraint.
           # TODO: Change to `2.0.*` for NumPy 2
           - numpy==1.23.*
+  build_python_libcudf:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - *pyarrow_build
   libarrow_build:
     common:
       - output_types: conda
@@ -781,6 +818,31 @@ dependencies:
         packages:
           - dask-cuda==24.10.*,>=0.0.0a0
           - *numba
+  depends_on_libcudf:
+    common:
+      - output_types: conda
+        packages:
+          - &libcudf_unsuffixed libcudf==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for libcudf-cu{11,12}.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcudf-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcudf-cu11==24.10.*,>=0.0.0a0
+          - {matrix: null, packages: [*libcudf_unsuffixed]}
   depends_on_pylibcudf:
     common:
       - output_types: conda
@@ -871,6 +933,33 @@ dependencies:
             packages: &cupy_packages_cu11
               - cupy-cuda11x>=12.0.0
           - {matrix: null, packages: *cupy_packages_cu11}
+  depends_on_librmm:
+    common:
+      - output_types: conda
+        packages:
+          - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for librmm-cu{11,12}.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu12==24.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "11.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu11==24.10.*,>=0.0.0a0
+          - matrix:
+            packages:
+              - *librmm_unsuffixed
   depends_on_rmm:
     common:
       - output_types: conda
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
index cdc5aa41abe..9b718b2ed83 100644
--- a/java/src/main/native/src/ColumnVectorJni.cpp
+++ b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -38,12 +38,70 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <arrow/api.h>
+#include <arrow/c/bridge.h>
 
 #include <algorithm>
 
 using cudf::jni::ptr_as_jlong;
 using cudf::jni::release_as_jlong;
 
+// Creating arrow as per given type_id and buffer arguments
+template <typename... Ts>
+std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
+{
+  switch (id) {
+    case cudf::type_id::BOOL8:
+      return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT16:
+      return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT32:
+      return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::INT64:
+      return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT8:
+      return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT16:
+      return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT32:
+      return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::UINT64:
+      return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT32:
+      return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::FLOAT64:
+      return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_DAYS:
+      return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
+                                                  std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_SECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MILLISECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_MICROSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::TIMESTAMP_NANOSECONDS:
+      return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
+                                                     std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_SECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MILLISECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_MICROSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
+                                                    std::forward<Ts>(args)...);
+    case cudf::type_id::DURATION_NANOSECONDS:
+      return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
+                                                    std::forward<Ts>(args)...);
+    default: CUDF_FAIL("Unsupported type_id conversion to arrow");
+  }
+}
+
 extern "C" {
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_sequence(
@@ -141,15 +199,27 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromArrow(JNIEnv* env,
         break;
       default:
         // this handles the primitive types
-        arrow_array = cudf::detail::to_arrow_array(
-          n_type, j_col_length, data_buffer, null_buffer, j_null_count);
+        arrow_array = to_arrow_array(n_type, j_col_length, data_buffer, null_buffer, j_null_count);
     }
     auto name_and_type                                = arrow::field("col", arrow_array->type());
     std::vector<std::shared_ptr<arrow::Field>> fields = {name_and_type};
     std::shared_ptr<arrow::Schema> schema             = std::make_shared<arrow::Schema>(fields);
     auto arrow_table =
       arrow::Table::Make(schema, std::vector<std::shared_ptr<arrow::Array>>{arrow_array});
-    auto retCols = cudf::from_arrow(*(arrow_table))->release();
+
+    ArrowSchema sch;
+    if (!arrow::ExportSchema(*arrow_table->schema(), &sch).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowSchema", 0)
+    }
+    auto batch = arrow_table->CombineChunksToBatch().ValueOrDie();
+    ArrowArray arr;
+    if (!arrow::ExportRecordBatch(*batch, &arr).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowArray", 0)
+    }
+    auto retCols = cudf::from_arrow(&sch, &arr)->release();
+    arr.release(&arr);
+    sch.release(&sch);
+
     if (retCols.size() != 1) {
       JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Must result in one column", 0);
     }
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index ecc551f1143..c749c8c84bf 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -54,6 +54,8 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <arrow/api.h>
+#include <arrow/c/bridge.h>
 #include <arrow/io/api.h>
 #include <arrow/ipc/api.h>
 
@@ -1069,6 +1071,15 @@ void append_flattened_child_names(cudf::io::column_name_info const& info,
   }
 }
 
+// Recursively make schema and its children nullable
+void set_nullable(ArrowSchema* schema)
+{
+  schema->flags |= ARROW_FLAG_NULLABLE;
+  for (int i = 0; i < schema->n_children; ++i) {
+    set_nullable(schema->children[i]);
+  }
+}
+
 }  // namespace
 
 }  // namespace jni
@@ -2635,7 +2646,13 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_convertCudfToArrowTable(JNIEnv
     // The pointer to the shared_ptr<> is returned as a jlong.
     using result_t = std::shared_ptr<arrow::Table>;
 
-    auto result = cudf::to_arrow(*tview, state->get_column_metadata(*tview));
+    auto got_arrow_schema = cudf::to_arrow_schema(*tview, state->get_column_metadata(*tview));
+    cudf::jni::set_nullable(got_arrow_schema.get());
+    auto got_arrow_array = cudf::to_arrow_host(*tview);
+    auto batch =
+      arrow::ImportRecordBatch(&got_arrow_array->array, got_arrow_schema.get()).ValueOrDie();
+    auto result = arrow::Table::FromRecordBatches({batch}).ValueOrDie();
+
     return ptr_as_jlong(new result_t{result});
   }
   CATCH_STD(env, 0)
@@ -2746,7 +2763,21 @@ Java_ai_rapids_cudf_Table_convertArrowTableToCudf(JNIEnv* env, jclass, jlong arr
 
   try {
     cudf::jni::auto_set_device(env);
-    return convert_table_for_return(env, cudf::from_arrow(*(handle->get())));
+
+    ArrowSchema sch;
+    if (!arrow::ExportSchema(*handle->get()->schema(), &sch).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowSchema", 0)
+    }
+    auto batch = handle->get()->CombineChunksToBatch().ValueOrDie();
+    ArrowArray arr;
+    if (!arrow::ExportRecordBatch(*batch, &arr).ok()) {
+      JNI_THROW_NEW(env, "java/lang/RuntimeException", "Unable to produce an ArrowArray", 0)
+    }
+    auto ret = cudf::from_arrow(&sch, &arr);
+    arr.release(&arr);
+    sch.release(&sch);
+
+    return convert_table_for_return(env, ret);
   }
   CATCH_STD(env, 0)
 }
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index e11d62b3bd5..72f20b30052 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -24,72 +24,15 @@ project(
   LANGUAGES CXX CUDA
 )
 
-option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
-       OFF
-)
-option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
-mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
-
-# Find Python early so that later commands can use it
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
-
-# If the user requested it we attempt to find CUDF.
-if(FIND_CUDF_CPP)
-  include(rapids-cpm)
-  include(rapids-export)
-  include(rapids-find)
-  rapids_cpm_init()
+find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
 
-  if(USE_LIBARROW_FROM_PYARROW)
-    # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow
-    # libraries. These variables have no effect because we are always searching for arrow via
-    # pyarrow, but they must be set as they are required arguments to the function in
-    # get_arrow.cmake.
-    set(CUDF_USE_ARROW_STATIC OFF)
-    set(CUDF_ENABLE_ARROW_S3 OFF)
-    set(CUDF_ENABLE_ARROW_ORC OFF)
-    set(CUDF_ENABLE_ARROW_PYTHON OFF)
-    set(CUDF_ENABLE_ARROW_PARQUET OFF)
-    include(../../cpp/cmake/thirdparty/get_arrow.cmake)
-  endif()
-
-  find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
-
-  # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
-  # for the interop.pyx
-  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
-else()
-  set(cudf_FOUND OFF)
-endif()
+# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
+# for the interop.pyx
+include(rapids-cpm)
+rapids_cpm_init()
+include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 
 include(rapids-cython-core)
-
-if(NOT cudf_FOUND)
-  set(BUILD_TESTS OFF)
-  set(BUILD_BENCHMARKS OFF)
-  set(CUDF_BUILD_TESTUTIL OFF)
-  set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
-  set(CUDA_STATIC_RUNTIME ON)
-
-  add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL)
-
-  # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous
-  # components like headers from libcudacxx, but we do need the libraries. However, we want to
-  # control where they are installed to. Since there are multiple subpackages of cudf._lib that
-  # require access to libcudf, we place the library and all its dependent artifacts in the cudf
-  # directory as a single source of truth and modify the other rpaths appropriately.
-  set(cython_lib_dir cudf)
-  include(../pylibcudf/cmake/Modules/WheelHelpers.cmake)
-  # TODO: This install is currently overzealous. We should only install the libraries that are
-  # downloaded by CPM during the build, not libraries that were found on the system.  However, in
-  # practice right this would only be a problem is if libcudf was not found but some of the
-  # dependencies were, and we have no real use cases where that happens.
-  install_aliased_imported_targets(
-    TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
-    DESTINATION ${cython_lib_dir}
-  )
-endif()
-
 rapids_cython_init()
 
 include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake)
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index ccc45413de4..d7da42a1708 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -1,5 +1,15 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
+# If libcudf was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcudf
+except ModuleNotFoundError:
+    pass
+else:
+    libcudf.load_library()
+    del libcudf
+
 # _setup_numba _must be called before numba.cuda is imported, because
 # it sets the numba config variable responsible for enabling
 # Minor Version Compatibility. Setting it after importing numba.cuda has no effect.
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index d6182673308..5ea378fc0e5 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -63,6 +63,7 @@ rapids_cython_create_modules(
 )
 
 target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
+target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
 
 set(targets_using_arrow_headers avro csv orc json parquet)
 link_to_pyarrow_headers("${targets_using_arrow_headers}")
diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index bcab009c102..bb38488eefb 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -54,7 +54,7 @@ class Column:
     @property
     def mask_ptr(self) -> int: ...
     def set_base_mask(self, value: Buffer | None) -> None: ...
-    def set_mask(self, value: Buffer | None) -> Self: ...
+    def set_mask(self, value: ColumnBase | Buffer | None) -> Self: ...
     @property
     def null_count(self) -> int: ...
     @property
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index 29cb9d7bd12..fd89904e766 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -120,7 +120,7 @@ def _read_tzfile_as_columns(
 
         # this happens for UTC-like zones
         min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
-        return (as_column([min_date]), as_column([np.timedelta64(0, "s")]))
+        return (as_column([min_date]), as_column([np.timedelta64(0, "s")]))  # type: ignore[return-value]
     return tuple(transition_times_and_offsets)  # type: ignore[return-value]
 
 
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 1fdaf9f8c07..a7e98e5218f 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -984,9 +984,9 @@ def find_and_replace(
         )
         replacement_col = catmap._data["index"].astype(replaced.codes.dtype)
 
-        replaced = column.as_column(replaced.codes)
+        replaced_codes = column.as_column(replaced.codes)
         output = libcudf.replace.replace(
-            replaced, to_replace_col, replacement_col
+            replaced_codes, to_replace_col, replacement_col
         )
 
         result = column.build_categorical_column(
@@ -1064,7 +1064,7 @@ def _validate_fillna_value(
                 raise TypeError(
                     "Cannot set a categorical with non-categorical data"
                 )
-            fill_value = fill_value._set_categories(
+            fill_value = cast(CategoricalColumn, fill_value)._set_categories(
                 self.categories,
             )
             return fill_value.codes.astype(self.codes.dtype)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 27278120abb..60b4126ddd4 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -553,7 +553,7 @@ def __setitem__(self, key: Any, value: Any):
         """
 
         # Normalize value to scalar/column
-        value_normalized = (
+        value_normalized: cudf.Scalar | ColumnBase = (
             cudf.Scalar(value, dtype=self.dtype)
             if is_scalar(value)
             else as_column(value, dtype=self.dtype)
@@ -609,9 +609,12 @@ def _scatter_by_slice(
                 )
 
         # step != 1, create a scatter map with arange
-        scatter_map = as_column(
-            rng,
-            dtype=cudf.dtype(np.int32),
+        scatter_map = cast(
+            cudf.core.column.NumericalColumn,
+            as_column(
+                rng,
+                dtype=cudf.dtype(np.int32),
+            ),
         )
 
         return self._scatter_by_column(scatter_map, value)
@@ -1111,11 +1114,16 @@ def argsort(
         if (ascending and self.is_monotonic_increasing) or (
             not ascending and self.is_monotonic_decreasing
         ):
-            return as_column(range(len(self)))
+            return cast(
+                cudf.core.column.NumericalColumn, as_column(range(len(self)))
+            )
         elif (ascending and self.is_monotonic_decreasing) or (
             not ascending and self.is_monotonic_increasing
         ):
-            return as_column(range(len(self) - 1, -1, -1))
+            return cast(
+                cudf.core.column.NumericalColumn,
+                as_column(range(len(self) - 1, -1, -1)),
+            )
         else:
             return libcudf.sort.order_by(
                 [self], [ascending], na_position, stable=True
@@ -1752,7 +1760,7 @@ def as_column(
     nan_as_null: bool | None = None,
     dtype: Dtype | None = None,
     length: int | None = None,
-):
+) -> ColumnBase:
     """Create a Column from an arbitrary object
 
     Parameters
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 302f04a0e71..c6a39199e3b 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -256,7 +256,10 @@ def from_sequences(
                 offset += len(data)
                 offset_vals.append(offset)
 
-        offset_col = column.as_column(offset_vals, dtype=size_type_dtype)
+        offset_col = cast(
+            NumericalColumn,
+            column.as_column(offset_vals, dtype=size_type_dtype),
+        )
 
         # Build ListColumn
         res = cls(
@@ -338,7 +341,7 @@ def __init__(self, parent: ParentType):
 
     def get(
         self,
-        index: int,
+        index: int | ColumnLike,
         default: ScalarLike | ColumnLike | None = None,
     ) -> ParentType:
         """
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index a37355dfcda..90bec049831 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -142,7 +142,7 @@ def __setitem__(self, key: Any, value: Any):
         """
 
         # Normalize value to scalar/column
-        device_value = (
+        device_value: cudf.Scalar | ColumnBase = (
             cudf.Scalar(
                 value,
                 dtype=self.dtype
@@ -552,7 +552,7 @@ def _validate_fillna_value(
     ) -> cudf.Scalar | ColumnBase:
         """Align fill_value for .fillna based on column type."""
         if is_scalar(fill_value):
-            cudf_obj = cudf.Scalar(fill_value)
+            cudf_obj: cudf.Scalar | ColumnBase = cudf.Scalar(fill_value)
             if not as_column(cudf_obj).can_cast_safely(self.dtype):
                 raise TypeError(
                     f"Cannot safely cast non-equivalent "
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 6f7508822d4..16e6908f308 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -776,11 +776,13 @@ def contains(
             # TODO: we silently ignore the `regex=` flag here
             if case is False:
                 input_column = libstrings.to_lower(self._column)
-                pat = libstrings.to_lower(column.as_column(pat, dtype="str"))
+                col_pat = libstrings.to_lower(
+                    column.as_column(pat, dtype="str")
+                )
             else:
                 input_column = self._column
-                pat = column.as_column(pat, dtype="str")
-            result_col = libstrings.contains_multiple(input_column, pat)
+                col_pat = column.as_column(pat, dtype="str")
+            result_col = libstrings.contains_multiple(input_column, col_pat)
         return self._return_or_inplace(result_col)
 
     def like(self, pat: str, esc: str | None = None) -> SeriesOrIndex:
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 7aa3e5f8163..34076fa0060 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -6,7 +6,7 @@
 import sys
 from collections import abc
 from functools import cached_property, reduce
-from typing import TYPE_CHECKING, Any, Callable, Mapping
+from typing import TYPE_CHECKING, Any, Callable, Mapping, cast
 
 import numpy as np
 import pandas as pd
@@ -35,7 +35,7 @@ class _NestedGetItemDict(dict):
     """
 
     @classmethod
-    def from_zip(cls, data):
+    def from_zip(cls, data: abc.Iterator):
         """Create from zip, specialized factory for nesting."""
         obj = cls()
         for key, value in data:
@@ -91,12 +91,12 @@ class ColumnAccessor(abc.MutableMapping):
         column length and data.values() are all Columns
     """
 
-    _data: dict[Any, ColumnBase]
-    _level_names: tuple[Any, ...]
+    _data: dict[abc.Hashable, ColumnBase]
+    _level_names: tuple[abc.Hashable, ...]
 
     def __init__(
         self,
-        data: abc.MutableMapping[Any, ColumnBase] | Self,
+        data: abc.MutableMapping[abc.Hashable, ColumnBase] | Self,
         multiindex: bool = False,
         level_names=None,
         rangeindex: bool = False,
@@ -141,16 +141,16 @@ def __init__(
                 f"data must be a ColumnAccessor or MutableMapping, not {type(data).__name__}"
             )
 
-    def __iter__(self):
+    def __iter__(self) -> abc.Iterator:
         return iter(self._data)
 
-    def __getitem__(self, key: Any) -> ColumnBase:
+    def __getitem__(self, key: abc.Hashable) -> ColumnBase:
         return self._data[key]
 
-    def __setitem__(self, key: Any, value: ColumnBase) -> None:
+    def __setitem__(self, key: abc.Hashable, value: ColumnBase) -> None:
         self.set_by_label(key, value)
 
-    def __delitem__(self, key: Any) -> None:
+    def __delitem__(self, key: abc.Hashable) -> None:
         old_ncols = len(self._data)
         del self._data[key]
         new_ncols = len(self._data)
@@ -186,7 +186,7 @@ def _from_columns_like_self(
             Whether to verify column length and type.
         """
         if sys.version_info.major >= 3 and sys.version_info.minor >= 10:
-            data = zip(self.names, columns, strict=True)
+            data = zip(self.names, columns, strict=True)  # type: ignore[call-overload]
         else:
             columns = list(columns)
             if len(columns) != len(self.names):
@@ -205,7 +205,7 @@ def _from_columns_like_self(
         )
 
     @property
-    def level_names(self) -> tuple[Any, ...]:
+    def level_names(self) -> tuple[abc.Hashable, ...]:
         if self._level_names is None or len(self._level_names) == 0:
             return tuple((None,) * max(1, self.nlevels))
         else:
@@ -221,7 +221,7 @@ def nlevels(self) -> int:
             return len(next(iter(self.keys())))
 
     @property
-    def name(self) -> Any:
+    def name(self) -> abc.Hashable:
         return self.level_names[-1]
 
     @cached_property
@@ -232,7 +232,7 @@ def nrows(self) -> int:
             return len(next(iter(self.values())))
 
     @cached_property
-    def names(self) -> tuple[Any, ...]:
+    def names(self) -> tuple[abc.Hashable, ...]:
         return tuple(self.keys())
 
     @cached_property
@@ -291,7 +291,7 @@ def to_pandas_index(self) -> pd.Index:
                     )
                 elif cudf.api.types.infer_dtype(self.names) == "integer":
                     if len(self.names) == 1:
-                        start = self.names[0]
+                        start = cast(int, self.names[0])
                         return pd.RangeIndex(
                             start=start, stop=start + 1, step=1, name=self.name
                         )
@@ -299,7 +299,9 @@ def to_pandas_index(self) -> pd.Index:
                     if len(uniques) == 1 and uniques[0] != 0:
                         diff = uniques[0]
                         new_range = range(
-                            self.names[0], self.names[-1] + diff, diff
+                            cast(int, self.names[0]),
+                            cast(int, self.names[-1]) + diff,
+                            diff,
                         )
                         return pd.RangeIndex(new_range, name=self.name)
             result = pd.Index(
@@ -310,7 +312,9 @@ def to_pandas_index(self) -> pd.Index:
             )
         return result
 
-    def insert(self, name: Any, value: ColumnBase, loc: int = -1) -> None:
+    def insert(
+        self, name: abc.Hashable, value: ColumnBase, loc: int = -1
+    ) -> None:
         """
         Insert column into the ColumnAccessor at the specified location.
 
@@ -457,7 +461,7 @@ def select_by_index(self, index: Any) -> Self:
             verify=False,
         )
 
-    def swaplevel(self, i=-2, j=-1) -> Self:
+    def swaplevel(self, i: abc.Hashable = -2, j: abc.Hashable = -1) -> Self:
         """
         Swap level i with level j.
         Calling this method does not change the ordering of the values.
@@ -486,7 +490,7 @@ def swaplevel(self, i=-2, j=-1) -> Self:
 
         # swap old keys for i and j
         for n, row in enumerate(self.names):
-            new_keys[n][i], new_keys[n][j] = row[j], row[i]
+            new_keys[n][i], new_keys[n][j] = row[j], row[i]  # type: ignore[call-overload, index]
             new_dict.update({row: tuple(new_keys[n])})
 
         # TODO: Change to deep=False when copy-on-write is default
@@ -494,10 +498,10 @@ def swaplevel(self, i=-2, j=-1) -> Self:
 
         # swap level_names for i and j
         new_names = list(self.level_names)
-        new_names[i], new_names[j] = new_names[j], new_names[i]
+        new_names[i], new_names[j] = new_names[j], new_names[i]  # type: ignore[call-overload]
 
         return type(self)(
-            new_data,
+            new_data,  # type: ignore[arg-type]
             multiindex=self.multiindex,
             level_names=new_names,
             rangeindex=self.rangeindex,
@@ -505,7 +509,7 @@ def swaplevel(self, i=-2, j=-1) -> Self:
             verify=False,
         )
 
-    def set_by_label(self, key: Any, value: ColumnBase) -> None:
+    def set_by_label(self, key: abc.Hashable, value: ColumnBase) -> None:
         """
         Add (or modify) column by name.
 
@@ -555,7 +559,7 @@ def _select_by_label_list_like(self, key: tuple) -> Self:
             verify=False,
         )
 
-    def _select_by_label_grouped(self, key: Any) -> Self:
+    def _select_by_label_grouped(self, key: abc.Hashable) -> Self:
         result = self._grouped_data[key]
         if isinstance(result, column.ColumnBase):
             # self._grouped_data[key] = self._data[key] so skip validation
@@ -606,8 +610,12 @@ def _select_by_label_slice(self, key: slice) -> Self:
         )
 
     def _select_by_label_with_wildcard(self, key: tuple) -> Self:
-        key = self._pad_key(key, slice(None))
-        data = {k: self._data[k] for k in self.names if _keys_equal(k, key)}
+        pad_key = self._pad_key(key, slice(None))
+        data = {
+            k: self._data[k]
+            for k in self.names
+            if _keys_equal(k, pad_key)  # type: ignore[arg-type]
+        }
         return type(self)(
             data,
             multiindex=self.multiindex,
@@ -616,7 +624,9 @@ def _select_by_label_with_wildcard(self, key: tuple) -> Self:
             verify=False,
         )
 
-    def _pad_key(self, key: Any, pad_value="") -> Any:
+    def _pad_key(
+        self, key: abc.Hashable, pad_value: str | slice = ""
+    ) -> abc.Hashable:
         """
         Pad the provided key to a length equal to the number
         of levels.
@@ -628,7 +638,9 @@ def _pad_key(self, key: Any, pad_value="") -> Any:
         return key + (pad_value,) * (self.nlevels - len(key))
 
     def rename_levels(
-        self, mapper: Mapping[Any, Any] | Callable, level: int | None = None
+        self,
+        mapper: Mapping[abc.Hashable, abc.Hashable] | Callable,
+        level: int | None = None,
     ) -> Self:
         """
         Rename the specified levels of the given ColumnAccessor
@@ -701,14 +713,14 @@ def rename_column(x):
             verify=False,
         )
 
-    def droplevel(self, level) -> None:
+    def droplevel(self, level: int) -> None:
         # drop the nth level
         if level < 0:
             level += self.nlevels
 
         old_ncols = len(self._data)
         self._data = {
-            _remove_key_level(key, level): value
+            _remove_key_level(key, level): value  # type: ignore[arg-type]
             for key, value in self._data.items()
         }
         new_ncols = len(self._data)
@@ -722,7 +734,7 @@ def droplevel(self, level) -> None:
         self._clear_cache(old_ncols, new_ncols)
 
 
-def _keys_equal(target: Any, key: Any) -> bool:
+def _keys_equal(target: abc.Hashable, key: abc.Iterable) -> bool:
     """
     Compare `key` to `target`.
 
@@ -740,7 +752,7 @@ def _keys_equal(target: Any, key: Any) -> bool:
     return True
 
 
-def _remove_key_level(key: Any, level: int) -> Any:
+def _remove_key_level(key: tuple, level: int) -> abc.Hashable:
     """
     Remove a level from key. If detupleize is True, and if only a
     single level remains, convert the tuple to a scalar.
@@ -751,7 +763,9 @@ def _remove_key_level(key: Any, level: int) -> Any:
     return result
 
 
-def _get_level(x, nlevels, level_names):
+def _get_level(
+    x: abc.Hashable, nlevels: int, level_names: tuple[abc.Hashable, ...]
+) -> abc.Hashable:
     """Get the level index from a level number or name.
 
     If given an integer, this function will handle wraparound for
diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py
index 6afbc0bbc65..16d8964f083 100644
--- a/python/cudf/cudf/core/copy_types.py
+++ b/python/cudf/cudf/core/copy_types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, cast
 
@@ -44,15 +44,17 @@ class GatherMap:
         If the map is not in bounds.
     """
 
-    #: The gather map
-    column: "NumericalColumn"
     #: The number of rows the gather map has been validated for
     nrows: int
     #: Was the validation for nullify=True?
     nullify: bool
 
     def __init__(self, column: Any, nrows: int, *, nullify: bool):
-        self.column = cudf.core.column.as_column(column)
+        #: The gather map
+        self.column = cast(
+            cudf.core.column.NumericalColumn,
+            cudf.core.column.as_column(column),
+        )
         self.nrows = nrows
         self.nullify = nullify
         if len(self.column) == 0:
@@ -135,11 +137,12 @@ class BooleanMask:
         If the mask has the wrong number of rows
     """
 
-    #: The boolean mask
-    column: "NumericalColumn"
-
     def __init__(self, column: Any, nrows: int):
-        self.column = cudf.core.column.as_column(column)
+        #: The boolean mask
+        self.column = cast(
+            cudf.core.column.NumericalColumn,
+            cudf.core.column.as_column(column),
+        )
         if self.column.dtype.kind != "b":
             raise TypeError("Boolean mask must have bool dtype")
         if len(column) != nrows:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 43693ec20b1..14b63c2b0d7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5830,7 +5830,7 @@ def from_records(
 
         df = cls._from_data(
             ColumnAccessor(
-                data=ca_data,
+                data=ca_data,  # type: ignore[arg-type]
                 multiindex=isinstance(
                     columns, (pd.MultiIndex, cudf.MultiIndex)
                 ),
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e46e24dd0d8..60253b9ae5d 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -40,7 +40,7 @@
 from cudf.core._base_index import BaseIndex
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.buffer import acquire_spill_lock
-from cudf.core.column import ColumnBase, as_column
+from cudf.core.column import ColumnBase, NumericalColumn, as_column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
 from cudf.core.dtypes import ListDtype
@@ -3008,9 +3008,12 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
         if stride != 1:
             return self._gather(
                 GatherMap.from_column_unchecked(
-                    as_column(
-                        range(start, stop, stride),
-                        dtype=libcudf.types.size_type_dtype,
+                    cast(
+                        NumericalColumn,
+                        as_column(
+                            range(start, stop, stride),
+                            dtype=libcudf.types.size_type_dtype,
+                        ),
                     ),
                     len(self),
                     nullify=False,
@@ -4761,10 +4764,13 @@ def _sample_axis_0(
     ):
         try:
             gather_map = GatherMap.from_column_unchecked(
-                cudf.core.column.as_column(
-                    random_state.choice(
-                        len(self), size=n, replace=replace, p=weights
-                    )
+                cast(
+                    NumericalColumn,
+                    cudf.core.column.as_column(
+                        random_state.choice(
+                            len(self), size=n, replace=replace, p=weights
+                        )
+                    ),
                 ),
                 len(self),
                 nullify=False,
@@ -6599,7 +6605,7 @@ def _drop_rows_by_labels(
             level = 0
 
         levels_index = obj.index.get_level_values(level)
-        if errors == "raise" and not labels.isin(levels_index).all():
+        if errors == "raise" and not labels.isin(levels_index).all():  # type: ignore[union-attr]
             raise KeyError("One or more values not found in axis")
 
         if isinstance(level, int):
@@ -6649,7 +6655,7 @@ def _drop_rows_by_labels(
             )
 
     else:
-        if errors == "raise" and not labels.isin(obj.index).all():
+        if errors == "raise" and not labels.isin(obj.index).all():  # type: ignore[union-attr]
             raise KeyError("One or more values not found in axis")
 
         if isinstance(labels, ColumnBase):
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index fac51a9e471..560f257c115 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -527,6 +527,7 @@ def read_parquet(
     engine="cudf",
     columns=None,
     storage_options=None,
+    filesystem=None,
     filters=None,
     row_groups=None,
     use_pandas_metadata=True,
@@ -567,7 +568,9 @@ def read_parquet(
     # Start by trying construct a filesystem object, so we
     # can apply filters on remote file-systems
     fs, paths = ioutils._get_filesystem_and_paths(
-        path_or_data=filepath_or_buffer, storage_options=storage_options
+        path_or_data=filepath_or_buffer,
+        storage_options=storage_options,
+        filesystem=filesystem,
     )
 
     # Normalize and validate filters
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 6579fd23634..3b23a53091e 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -269,6 +269,28 @@ def test_read_parquet_ext(
     assert_eq(expect, got1)
 
 
+def test_read_parquet_filesystem(s3_base, s3so, pdf):
+    fname = "data.0.parquet"
+    # NOTE: Need a unique bucket name when a glob pattern
+    # is used, otherwise fsspec seems to cache the bucket
+    # contents, and later tests using the same bucket name
+    # will fail.
+    bucket = "test_read_parquet_filesystem"
+    buffer = BytesIO()
+    pdf.to_parquet(path=buffer)
+    buffer.seek(0)
+    fs = get_fs_token_paths("s3://", mode="rb", storage_options=s3so)[0]
+    with s3_context(
+        s3_base=s3_base,
+        bucket=bucket,
+        files={fname: buffer},
+    ):
+        # Check that a glob pattern works
+        path = f"s3://{bucket}/{'data.*.parquet'}"
+        got = cudf.read_parquet(path, filesystem=fs)
+    assert_eq(pdf, got)
+
+
 def test_read_parquet_multi_file(s3_base, s3so, pdf):
     fname_1 = "test_parquet_reader_multi_file_1.parquet"
     buffer_1 = BytesIO()
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 4ac9b63985f..18106e7475b 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -12,7 +12,7 @@
 import fsspec.implementations.local
 import numpy as np
 import pandas as pd
-from fsspec.core import get_fs_token_paths
+from fsspec.core import expand_paths_if_needed, get_fs_token_paths
 
 from cudf.core._compat import PANDAS_LT_300
 from cudf.utils.docutils import docfmt_partial
@@ -139,6 +139,9 @@
     For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
     pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
     ``urllib`` for more details.
+filesystem : fsspec.AbstractFileSystem, default None
+    Filesystem object to use when reading the parquet data. This argument
+    should not be used at the same time as `storage_options`.
 filters : list of tuple, list of lists of tuples, default None
     If not None, specifies a filter predicate used to filter out row groups
     using statistics stored for each row group as Parquet metadata. Row groups
@@ -1536,11 +1539,18 @@ def is_directory(path_or_data, storage_options=None):
     return False
 
 
-def _get_filesystem_and_paths(path_or_data, storage_options):
+def _get_filesystem_and_paths(
+    path_or_data,
+    storage_options,
+    *,
+    filesystem=None,
+):
     # Returns a filesystem object and the filesystem-normalized
     # paths. If `path_or_data` does not correspond to a path or
     # list of paths (or if the protocol is not supported), the
     # return will be `None` for the fs and `[]` for the paths.
+    # If a filesystem object is already available, it can be
+    # passed with the `filesystem` argument.
 
     fs = None
     return_paths = path_or_data
@@ -1557,16 +1567,36 @@ def _get_filesystem_and_paths(path_or_data, storage_options):
         else:
             path_or_data = [path_or_data]
 
-        try:
-            fs, _, fs_paths = get_fs_token_paths(
-                path_or_data, mode="rb", storage_options=storage_options
-            )
-            return_paths = fs_paths
-        except ValueError as e:
-            if str(e).startswith("Protocol not known"):
-                return None, []
-            else:
-                raise e
+        if filesystem is None:
+            try:
+                fs, _, fs_paths = get_fs_token_paths(
+                    path_or_data, mode="rb", storage_options=storage_options
+                )
+                return_paths = fs_paths
+            except ValueError as e:
+                if str(e).startswith("Protocol not known"):
+                    return None, []
+                else:
+                    raise e
+        else:
+            if not isinstance(filesystem, fsspec.AbstractFileSystem):
+                raise ValueError(
+                    f"Expected fsspec.AbstractFileSystem. Got {filesystem}"
+                )
+
+            if storage_options:
+                raise ValueError(
+                    f"Cannot specify storage_options when an explicit "
+                    f"filesystem object is specified. Got: {storage_options}"
+                )
+
+            fs = filesystem
+            return_paths = [
+                fs._strip_protocol(u)
+                for u in expand_paths_if_needed(
+                    path_or_data, "rb", 1, fs, None
+                )
+            ]
 
     return fs, return_paths
 
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 9db52164eca..cb9fa30afab 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
+    "libcudf==24.10.*,>=0.0.0a0",
     "numba>=0.57",
     "numpy>=1.23,<2.0a0",
     "nvtx>=0.2.1",
@@ -126,6 +127,8 @@ matrix-entry = "cuda_suffixed=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
+    "libcudf==24.10.*,>=0.0.0a0",
+    "librmm==24.10.*,>=0.0.0a0",
     "ninja",
     "numpy==1.23.*",
     "pyarrow==16.1.0.*",
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 16b2c8959e2..5bd3eb5fa7f 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -134,6 +134,8 @@ def _get_non_empty_data(
         return cudf.core.column.as_column(
             np.arange(start=0, stop=2, dtype=s.dtype)
         )
+    elif isinstance(s.dtype, cudf.core.dtypes.DecimalDtype):
+        return cudf.core.column.as_column(range(2), dtype=s.dtype)
     else:
         raise TypeError(
             f"Don't know how to handle column of type {type(s).__name__}"
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index 8f52fce7818..c025280c240 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -23,11 +23,7 @@
 from cudf.io import write_to_dataset
 from cudf.io.parquet import _apply_post_filters, _normalize_filters
 from cudf.utils.dtypes import cudf_dtype_from_pa_type
-from cudf.utils.ioutils import (
-    _ROW_GROUP_SIZE_BYTES_DEFAULT,
-    _fsspec_data_transfer,
-    _is_local_filesystem,
-)
+from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
 
 
 class CudfEngine(ArrowDatasetEngine):
@@ -93,40 +89,35 @@ def _read_paths(
         dataset_kwargs = dataset_kwargs or {}
         dataset_kwargs["partitioning"] = partitioning or "hive"
 
-        # Non-local filesystem handling
-        paths_or_fobs = paths
-        if not _is_local_filesystem(fs):
-            paths_or_fobs = [
-                _fsspec_data_transfer(fpath, fs=fs) for fpath in paths
-            ]
-
         # Use cudf to read in data
         try:
             df = cudf.read_parquet(
-                paths_or_fobs,
+                paths,
                 engine="cudf",
                 columns=columns,
                 row_groups=row_groups if row_groups else None,
                 dataset_kwargs=dataset_kwargs,
                 categorical_partitions=False,
+                filesystem=fs,
                 **kwargs,
             )
         except RuntimeError as err:
             # TODO: Remove try/except after null-schema issue is resolved
             # (See: https://github.com/rapidsai/cudf/issues/12702)
-            if len(paths_or_fobs) > 1:
+            if len(paths) > 1:
                 df = cudf.concat(
                     [
                         cudf.read_parquet(
-                            pof,
+                            path,
                             engine="cudf",
                             columns=columns,
                             row_groups=row_groups[i] if row_groups else None,
                             dataset_kwargs=dataset_kwargs,
                             categorical_partitions=False,
+                            filesystem=fs,
                             **kwargs,
                         )
-                        for i, pof in enumerate(paths_or_fobs)
+                        for i, path in enumerate(paths)
                     ]
                 )
             else:
diff --git a/python/dask_cudf/dask_cudf/tests/test_join.py b/python/dask_cudf/dask_cudf/tests/test_join.py
index ed291ef31a7..3e078c47cdd 100644
--- a/python/dask_cudf/dask_cudf/tests/test_join.py
+++ b/python/dask_cudf/dask_cudf/tests/test_join.py
@@ -386,3 +386,14 @@ def test_issue_12773():
         expected.to_pandas(),
         check_index=False,
     )
+
+
+@pytest.mark.parametrize(
+    "typ", [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype]
+)
+def test_merge_on_decimal(typ):
+    df = cudf.DataFrame({"a": [1], "b": [2]}, dtype=typ(1))
+    ddf = dask_cudf.from_cudf(df, npartitions=1)
+    result = ddf.merge(ddf, left_on="a", right_on="a")
+    expected = df.merge(df, left_on="a", right_on="a")
+    dd.assert_eq(result, expected)
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
new file mode 100644
index 00000000000..09c7ed2e217
--- /dev/null
+++ b/python/libcudf/CMakeLists.txt
@@ -0,0 +1,58 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+
+include(../../rapids_config.cmake)
+
+project(
+  libcudf-python
+  VERSION "${RAPIDS_VERSION}"
+  LANGUAGES CXX
+)
+
+# Check if cudf is already available. If so, it is the user's responsibility to ensure that the
+# CMake package is also available at build time of the Python cudf package.
+find_package(cudf "${RAPIDS_VERSION}")
+
+if(cudf_FOUND)
+  return()
+endif()
+
+unset(cudf_FOUND)
+
+# For wheels, this should always be true
+set(USE_LIBARROW_FROM_PYARROW ON)
+
+# Find Python early so that later commands can use it
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
+
+set(BUILD_TESTS OFF)
+set(BUILD_BENCHMARKS OFF)
+set(CUDF_BUILD_TESTUTIL OFF)
+set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+set(CUDA_STATIC_RUNTIME ON)
+
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
+
+include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake)
+
+add_subdirectory(../../cpp cudf-cpp)
+
+# Ensure other libraries needed by libcudf.so get installed alongside it.
+include(cmake/Modules/WheelHelpers.cmake)
+install_aliased_imported_targets(
+  TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
+  DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
+)
diff --git a/python/libcudf/LICENSE b/python/libcudf/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/libcudf/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/libcudf/README.md b/python/libcudf/README.md
new file mode 120000
index 00000000000..fe840054137
--- /dev/null
+++ b/python/libcudf/README.md
@@ -0,0 +1 @@
+../../README.md
\ No newline at end of file
diff --git a/python/pylibcudf/cmake/Modules/WheelHelpers.cmake b/python/libcudf/cmake/Modules/WheelHelpers.cmake
similarity index 100%
rename from python/pylibcudf/cmake/Modules/WheelHelpers.cmake
rename to python/libcudf/cmake/Modules/WheelHelpers.cmake
diff --git a/python/libcudf/libcudf/VERSION b/python/libcudf/libcudf/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/libcudf/libcudf/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/libcudf/libcudf/__init__.py b/python/libcudf/libcudf/__init__.py
new file mode 100644
index 00000000000..10c476cbe89
--- /dev/null
+++ b/python/libcudf/libcudf/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcudf._version import __git_commit__, __version__
+from libcudf.load import load_library
diff --git a/python/libcudf/libcudf/_version.py b/python/libcudf/libcudf/_version.py
new file mode 100644
index 00000000000..7dd732b4905
--- /dev/null
+++ b/python/libcudf/libcudf/_version.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files(__package__)
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+try:
+    __git_commit__ = (
+        importlib.resources.files(__package__)
+        .joinpath("GIT_COMMIT")
+        .read_text()
+        .strip()
+    )
+except FileNotFoundError:
+    __git_commit__ = ""
+
+__all__ = ["__git_commit__", "__version__"]
diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py
new file mode 100644
index 00000000000..f6ba0d51bdb
--- /dev/null
+++ b/python/libcudf/libcudf/load.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ctypes
+import os
+
+
+def load_library():
+    # This is loading the libarrow shared library in situations where it comes from the
+    # pyarrow package (i.e. when installed as a wheel).
+    import pyarrow  # noqa: F401
+
+    # Dynamically load libcudf.so. Prefer a system library if one is present to
+    # avoid clobbering symbols that other packages might expect, but if no
+    # other library is present use the one in the wheel.
+    libcudf_lib = None
+    try:
+        libcudf_lib = ctypes.CDLL("libcudf.so", ctypes.RTLD_GLOBAL)
+    except OSError:
+        # If neither of these directories contain the library, we assume we are in an
+        # environment where the C++ library is already installed somewhere else and the
+        # CMake build of the libcudf Python package was a no-op.
+        #
+        # Note that this approach won't work for real editable installs of the libcudf package.
+        # scikit-build-core has limited support for importlib.resources so there isn't a clean
+        # way to support that case yet.
+        for lib_dir in ("lib", "lib64"):
+            if os.path.isfile(
+                lib := os.path.join(
+                    os.path.dirname(__file__), lib_dir, "libcudf.so"
+                )
+            ):
+                libcudf_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL)
+                break
+
+    # The caller almost never needs to do anything with this library, but no
+    # harm in offering the option since this object at least provides a handle
+    # to inspect where libcudf was loaded from.
+    return libcudf_lib
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
new file mode 100644
index 00000000000..fd01f7f6e2f
--- /dev/null
+++ b/python/libcudf/pyproject.toml
@@ -0,0 +1,75 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+build-backend = "rapids_build_backend.build"
+requires = [
+    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "scikit-build-core[pyproject]>=0.10.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "libcudf"
+dynamic = ["version"]
+description = "cuDF - GPU Dataframe (C++)"
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.10"
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: C++",
+    "Environment :: GPU :: NVIDIA CUDA",
+]
+dependencies = [
+    "pyarrow>=16.1.0,<16.2.0a0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cudf"
+
+[project.entry-points."cmake.prefix"]
+libcudf = "libcudf"
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.version = "CMakeLists.txt"
+minimum-version = "build-system.requires"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["libcudf"]
+wheel.install-dir = "libcudf"
+wheel.py-api = "py3"
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "libcudf/VERSION"
+regex = "(?P<value>.*)"
+
+[tool.rapids-build-backend]
+build-backend = "scikit_build_core.build"
+dependencies-file = "../../dependencies.yaml"
+matrix-entry = "cuda_suffixed=true"
+requires = [
+    "cmake>=3.26.4,!=3.30.0",
+    "librmm==24.10.*,>=0.0.0a0",
+    "ninja",
+    "pyarrow==16.1.0.*",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt
index 424d8372280..340ad120377 100644
--- a/python/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/CMakeLists.txt
@@ -24,72 +24,16 @@ project(
   LANGUAGES CXX CUDA
 )
 
-option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
-       OFF
-)
-option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
-mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
-
-# Find Python early so that later commands can use it
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
-
-# If the user requested it we attempt to find CUDF.
-if(FIND_CUDF_CPP)
-  include(rapids-cpm)
-  include(rapids-export)
-  include(rapids-find)
-  rapids_cpm_init()
-
-  if(USE_LIBARROW_FROM_PYARROW)
-    # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow
-    # libraries. These variables have no effect because we are always searching for arrow via
-    # pyarrow, but they must be set as they are required arguments to the function in
-    # get_arrow.cmake.
-    set(CUDF_USE_ARROW_STATIC OFF)
-    set(CUDF_ENABLE_ARROW_S3 OFF)
-    set(CUDF_ENABLE_ARROW_ORC OFF)
-    set(CUDF_ENABLE_ARROW_PYTHON OFF)
-    set(CUDF_ENABLE_ARROW_PARQUET OFF)
-    include(../../cpp/cmake/thirdparty/get_arrow.cmake)
-  endif()
-
-  find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
+find_package(cudf "${RAPIDS_VERSION}" REQUIRED)
 
-  # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
-  # for the interop.pyx
-  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
-else()
-  set(cudf_FOUND OFF)
-endif()
+# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
+# for the interop.pyx
+include(rapids-cpm)
+rapids_cpm_init()
+include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 
 include(rapids-cython-core)
 
-if(NOT cudf_FOUND)
-  set(BUILD_TESTS OFF)
-  set(BUILD_BENCHMARKS OFF)
-  set(CUDF_BUILD_TESTUTIL OFF)
-  set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
-  set(CUDA_STATIC_RUNTIME ON)
-
-  add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL)
-
-  # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous
-  # components like headers from libcudacxx, but we do need the libraries. However, we want to
-  # control where they are installed to. Since there are multiple subpackages of pylibcudf that
-  # require access to libcudf, we place the library and all its dependent artifacts in the cudf
-  # directory as a single source of truth and modify the other rpaths appropriately.
-  set(cython_lib_dir pylibcudf)
-  include(cmake/Modules/WheelHelpers.cmake)
-  # TODO: This install is currently overzealous. We should only install the libraries that are
-  # downloaded by CPM during the build, not libraries that were found on the system.  However, in
-  # practice right this would only be a problem is if libcudf was not found but some of the
-  # dependencies were, and we have no real use cases where that happens.
-  install_aliased_imported_targets(
-    TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
-    DESTINATION ${cython_lib_dir}
-  )
-endif()
-
 rapids_cython_init()
 
 include(cmake/Modules/LinkPyarrowHeaders.cmake)
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index ab21bfe97ab..f81a32e07f9 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -53,6 +53,8 @@ rapids_cython_create_modules(
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
 )
 
+target_include_directories(pylibcudf_interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
+
 include(${rapids-cmake-dir}/export/find_package_root.cmake)
 include(../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
 target_link_libraries(pylibcudf_interop PUBLIC nanoarrow)
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index 677fdaf80d0..e784c6c6dd5 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -1,5 +1,15 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
+# If libcudf was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+try:
+    import libcudf
+except ModuleNotFoundError:
+    pass
+else:
+    libcudf.load_library()
+    del libcudf
+
 from . import (
     aggregation,
     binaryop,
diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd
index ab9b5b99ce2..f65c1034598 100644
--- a/python/pylibcudf/pylibcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/io/json.pxd
@@ -15,8 +15,8 @@ cpdef TableWithMetadata read_json(
     list dtypes = *,
     compression_type compression = *,
     bool lines = *,
-    size_type byte_range_offset = *,
-    size_type byte_range_size = *,
+    size_t byte_range_offset = *,
+    size_t byte_range_size = *,
     bool keep_quotes = *,
     bool mixed_types_as_string = *,
     bool prune_columns = *,
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index ce086f4a489..29e49083bc6 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -51,8 +51,8 @@ cdef json_reader_options _setup_json_reader_options(
         list dtypes,
         compression_type compression,
         bool lines,
-        size_type byte_range_offset,
-        size_type byte_range_size,
+        size_t byte_range_offset,
+        size_t byte_range_size,
         bool keep_quotes,
         bool mixed_types_as_string,
         bool prune_columns,
@@ -189,8 +189,8 @@ cpdef TableWithMetadata read_json(
     list dtypes = None,
     compression_type compression = compression_type.AUTO,
     bool lines = False,
-    size_type byte_range_offset = 0,
-    size_type byte_range_size = 0,
+    size_t byte_range_offset = 0,
+    size_t byte_range_size = 0,
     bool keep_quotes = False,
     bool mixed_types_as_string = False,
     bool prune_columns = False,
@@ -212,9 +212,9 @@ cpdef TableWithMetadata read_json(
         (column_child_name, column_child_type, list of grandchild dtypes).
     compression: CompressionType, default CompressionType.AUTO
         The compression format of the JSON source.
-    byte_range_offset : size_type, default 0
+    byte_range_offset : size_t, default 0
         Number of bytes to skip from source start.
-    byte_range_size : size_type, default 0
+    byte_range_size : size_t, default 0
         Number of bytes to read. By default, will read all bytes.
     keep_quotes : bool, default False
         Whether the reader should keep quotes of string values.
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
index 7514e6c5258..1c74f8ca3ac 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
@@ -27,8 +27,8 @@ cdef extern from "cudf/io/json.hpp" \
         cudf_io_types.source_info get_source() except +
         vector[string] get_dtypes() except +
         cudf_io_types.compression_type get_compression() except +
-        size_type get_byte_range_offset() except +
-        size_type get_byte_range_size() except +
+        size_t get_byte_range_offset() except +
+        size_t get_byte_range_size() except +
         bool is_enabled_lines() except +
         bool is_enabled_mixed_types_as_string() except +
         bool is_enabled_prune_columns() except +
@@ -41,8 +41,8 @@ cdef extern from "cudf/io/json.hpp" \
         void set_compression(
             cudf_io_types.compression_type compression
         ) except +
-        void set_byte_range_offset(size_type offset) except +
-        void set_byte_range_size(size_type size) except +
+        void set_byte_range_offset(size_t offset) except +
+        void set_byte_range_size(size_t size) except +
         void enable_lines(bool val) except +
         void enable_mixed_types_as_string(bool val) except +
         void enable_prune_columns(bool val) except +
@@ -73,10 +73,10 @@ cdef extern from "cudf/io/json.hpp" \
             cudf_io_types.compression_type compression
         ) except +
         json_reader_options_builder& byte_range_offset(
-            size_type offset
+            size_t offset
         ) except +
         json_reader_options_builder& byte_range_size(
-            size_type size
+            size_t size
         ) except +
         json_reader_options_builder& lines(
             bool val
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index b037508d03f..63d76e9fd4e 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -19,6 +19,7 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
+    "libcudf==24.10.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
     "pyarrow>=16.1.0,<16.2.0a0",
@@ -101,6 +102,8 @@ matrix-entry = "cuda_suffixed=true"
 requires = [
     "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
+    "libcudf==24.10.*,>=0.0.0a0",
+    "librmm==24.10.*,>=0.0.0a0",
     "ninja",
     "numpy==1.23.*",
     "pyarrow==16.1.0.*",