diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 78f529a44d3..9cbacee8e8d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -276,7 +276,7 @@ rapids_cpm_init()
 
 # Not using rapids-cmake since we never want to find, always download.
 CPMAddPackage(
-  NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
+  NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG
   c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
 )
 rapids_make_logger(cudf EXPORT_SET cudf-exports)
@@ -916,7 +916,9 @@ if(CUDF_LARGE_STRINGS_DISABLED)
 endif()
 
 # Define logging level
-target_compile_definitions(cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=${LIBCUDF_LOGGING_LEVEL}")
+target_compile_definitions(
+  cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=CUDF_LOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}"
+)
 
 # Enable remote IO through KvikIO
 target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)
diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
index c440643037b..b0c48e04710 100644
--- a/cpp/cmake/thirdparty/get_nanoarrow.cmake
+++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -14,11 +14,6 @@
 
 # This function finds nanoarrow and sets any additional necessary environment variables.
 function(find_and_configure_nanoarrow)
-  include(${rapids-cmake-dir}/cpm/package_override.cmake)
-
-  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
-  rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")
-
   if(NOT BUILD_SHARED_LIBS)
     set(_exclude_from_all EXCLUDE_FROM_ALL FALSE)
   else()
@@ -31,6 +26,9 @@ function(find_and_configure_nanoarrow)
     nanoarrow 0.6.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
+    GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
+    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
+    GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all}
   )
   set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
deleted file mode 100644
index e9a36fcb567..00000000000
--- a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
+++ /dev/null
@@ -1,38 +0,0 @@
-diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
-index caa6be4..70ec8a2 100644
---- a/src/nanoarrow/common/inline_buffer.h
-+++ b/src/nanoarrow/common/inline_buffer.h
-@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
- }
- 
- static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
--  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
-+  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
-                    ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
-                    ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
-                    ((values[7] + 0x7f) & 0x80));
-@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
-     // set bits within a single byte
-     const uint8_t only_byte_mask =
-         i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
--    bits[bytes_begin] &= only_byte_mask;
-+    bits[bytes_begin] &= only_byte_mask;  // NOLINT
-     bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
-     return;
-   }
- 
-   // set/clear trailing bits of first byte
--  bits[bytes_begin] &= first_byte_mask;
-+  bits[bytes_begin] &= first_byte_mask;  // NOLINT
-   bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
- 
-   if (bytes_end - bytes_begin > 2) {
-@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
-   n_remaining -= n_full_bytes * 8;
-   if (n_remaining > 0) {
-     // Zero out the last byte
--    *out_cursor = 0x00;
-+    *out_cursor = 0x00;  // NOLINT
-     for (int i = 0; i < n_remaining; i++) {
-       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
-     }
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
deleted file mode 100644
index d529787e7c8..00000000000
--- a/cpp/cmake/thirdparty/patches/nanoarrow_override.json
+++ /dev/null
@@ -1,18 +0,0 @@
-
-{
-  "packages" : {
-    "nanoarrow" : {
-      "version" : "0.6.0.dev",
-      "git_url" : "https://github.com/apache/arrow-nanoarrow.git",
-      "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
-      "git_shallow" : false,
-      "patches" : [
-        {
-          "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
-          "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
-          "fixed_in" : ""
-        }
-      ]
-    }
-  }
-}
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index ea480b133dc..aacb5ccfede 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -444,7 +444,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @return string_view instance representing this element at this index
    */
   template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, string_view>)>
-  __device__ [[nodiscard]] T element(size_type element_index) const noexcept
+  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
   {
     size_type index       = element_index + offset();  // account for this view's _offset
     char const* d_strings = static_cast<char const*>(_data);
@@ -503,7 +503,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @return dictionary32 instance representing this element at this index
    */
   template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, dictionary32>)>
-  __device__ [[nodiscard]] T element(size_type element_index) const noexcept
+  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
   {
     size_type index    = element_index + offset();  // account for this view's _offset
     auto const indices = d_children[0];
@@ -521,7 +521,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @return numeric::fixed_point representing the element at this index
    */
   template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_point<T>())>
-  __device__ [[nodiscard]] T element(size_type element_index) const noexcept
+  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
   {
     using namespace numeric;
     using rep        = typename T::rep;
@@ -1034,7 +1034,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @return Reference to the element at the specified index
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  __device__ [[nodiscard]] T& element(size_type element_index) const noexcept
+  [[nodiscard]] __device__ T& element(size_type element_index) const noexcept
   {
     return data<T>()[element_index];
   }
@@ -1427,13 +1427,13 @@ struct pair_rep_accessor {
 
  private:
   template <typename R, std::enable_if_t<std::is_same_v<R, rep_type>, void>* = nullptr>
-  __device__ [[nodiscard]] inline auto get_rep(cudf::size_type i) const
+  [[nodiscard]] __device__ inline auto get_rep(cudf::size_type i) const
   {
     return col.element<R>(i);
   }
 
   template <typename R, std::enable_if_t<not std::is_same_v<R, rep_type>, void>* = nullptr>
-  __device__ [[nodiscard]] inline auto get_rep(cudf::size_type i) const
+  [[nodiscard]] __device__ inline auto get_rep(cudf::size_type i) const
   {
     return col.element<R>(i).value();
   }
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh
index c30c3d6f4bd..59011f7b138 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.cuh
+++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh
@@ -23,6 +23,7 @@
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/device_atomics.cuh>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -31,7 +32,6 @@
 #include <thrust/fill.h>
 
 #include <type_traits>
-#include <vector>
 
 namespace cudf {
 namespace detail {
@@ -216,12 +216,12 @@ struct identity_initializer {
  * @throw cudf::logic_error if column type is not fixed-width
  *
  * @param table The table of columns to initialize.
- * @param aggs A vector of aggregation operations corresponding to the table
+ * @param aggs A span of aggregation operations corresponding to the table
  * columns. The aggregations determine the identity value for each column.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 void initialize_with_identity(mutable_table_view& table,
-                              std::vector<aggregation::Kind> const& aggs,
+                              host_span<cudf::aggregation::Kind const> aggs,
                               rmm::cuda_stream_view stream);
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/device_scalar.hpp b/cpp/include/cudf/detail/device_scalar.hpp
index 16ca06c6561..090dc8b62b6 100644
--- a/cpp/include/cudf/detail/device_scalar.hpp
+++ b/cpp/include/cudf/detail/device_scalar.hpp
@@ -78,7 +78,7 @@ class device_scalar : public rmm::device_scalar<T> {
   [[nodiscard]] T value(rmm::cuda_stream_view stream) const
   {
     cuda_memcpy<T>(bounce_buffer, device_span<T const>{this->data(), 1}, stream);
-    return bounce_buffer[0];
+    return std::move(bounce_buffer[0]);
   }
 
   void set_value_async(T const& value, rmm::cuda_stream_view stream)
diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp
index 504c31057ae..33f3176d2c6 100644
--- a/cpp/include/cudf/strings/string_view.hpp
+++ b/cpp/include/cudf/strings/string_view.hpp
@@ -54,7 +54,7 @@ class string_view {
    *
    * @return The number of characters in this string
    */
-  __device__ [[nodiscard]] inline size_type length() const;
+  [[nodiscard]] __device__ inline size_type length() const;
   /**
    * @brief Return a pointer to the internal device array
    *
@@ -119,13 +119,13 @@ class string_view {
    *
    * @return new iterator pointing to the beginning of this string
    */
-  __device__ [[nodiscard]] inline const_iterator begin() const;
+  [[nodiscard]] __device__ inline const_iterator begin() const;
   /**
    * @brief Return new iterator pointing past the end of this string
    *
    * @return new iterator pointing past the end of this string
    */
-  __device__ [[nodiscard]] inline const_iterator end() const;
+  [[nodiscard]] __device__ inline const_iterator end() const;
 
   /**
    * @brief Return single UTF-8 character at the given character position
@@ -140,7 +140,7 @@ class string_view {
    * @param pos Character position
    * @return Byte offset from data() for a given character position
    */
-  __device__ [[nodiscard]] inline size_type byte_offset(size_type pos) const;
+  [[nodiscard]] __device__ inline size_type byte_offset(size_type pos) const;
 
   /**
    * @brief Comparing target string with this string. Each character is compared
@@ -155,7 +155,7 @@ class string_view {
    *            not match is greater in the arg string, or all compared characters
    *            match but the arg string is longer.
    */
-  __device__ [[nodiscard]] inline int compare(string_view const& str) const;
+  [[nodiscard]] __device__ inline int compare(string_view const& str) const;
   /**
    * @brief Comparing target string with this string. Each character is compared
    * as a UTF-8 code-point value.
@@ -225,7 +225,7 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return npos if str is not found in this string.
    */
-  __device__ [[nodiscard]] inline size_type find(string_view const& str,
+  [[nodiscard]] __device__ inline size_type find(string_view const& str,
                                                  size_type pos   = 0,
                                                  size_type count = -1) const;
   /**
@@ -253,7 +253,7 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return npos if arg string is not found in this string.
    */
-  __device__ [[nodiscard]] inline size_type find(char_utf8 character,
+  [[nodiscard]] __device__ inline size_type find(char_utf8 character,
                                                  size_type pos   = 0,
                                                  size_type count = -1) const;
   /**
@@ -266,7 +266,7 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return npos if arg string is not found in this string.
    */
-  __device__ [[nodiscard]] inline size_type rfind(string_view const& str,
+  [[nodiscard]] __device__ inline size_type rfind(string_view const& str,
                                                   size_type pos   = 0,
                                                   size_type count = -1) const;
   /**
@@ -294,7 +294,7 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return npos if arg string is not found in this string.
    */
-  __device__ [[nodiscard]] inline size_type rfind(char_utf8 character,
+  [[nodiscard]] __device__ inline size_type rfind(char_utf8 character,
                                                   size_type pos   = 0,
                                                   size_type count = -1) const;
 
@@ -306,7 +306,7 @@ class string_view {
    * @param length Number of characters from start to include in the sub-string.
    * @return New instance pointing to a subset of the characters within this instance.
    */
-  __device__ [[nodiscard]] inline string_view substr(size_type start, size_type length) const;
+  [[nodiscard]] __device__ inline string_view substr(size_type start, size_type length) const;
 
   /**
    * @brief Return minimum value associated with the string type
@@ -386,7 +386,7 @@ class string_view {
    * @param bytepos Byte position from start of _data.
    * @return The character position for the specified byte.
    */
-  __device__ [[nodiscard]] inline size_type character_offset(size_type bytepos) const;
+  [[nodiscard]] __device__ inline size_type character_offset(size_type bytepos) const;
 
   /**
    * @brief Common internal implementation for string_view::find and string_view::rfind.
diff --git a/cpp/include/cudf/utilities/memory_resource.hpp b/cpp/include/cudf/utilities/memory_resource.hpp
index b562574fd79..eaba466557b 100644
--- a/cpp/include/cudf/utilities/memory_resource.hpp
+++ b/cpp/include/cudf/utilities/memory_resource.hpp
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <cudf/utilities/memory_resource.hpp>
-
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu
index d915c85bf85..3a6ff36c424 100644
--- a/cpp/src/aggregation/aggregation.cu
+++ b/cpp/src/aggregation/aggregation.cu
@@ -17,15 +17,14 @@
 #include <cudf/detail/aggregation/aggregation.cuh>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <vector>
-
 namespace cudf {
 namespace detail {
 void initialize_with_identity(mutable_table_view& table,
-                              std::vector<aggregation::Kind> const& aggs,
+                              host_span<cudf::aggregation::Kind const> aggs,
                               rmm::cuda_stream_view stream)
 {
   // TODO: Initialize all the columns in a single kernel instead of invoking one
diff --git a/cpp/src/bitmask/is_element_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp
index 7eb80c4249e..d36dacca739 100644
--- a/cpp/src/bitmask/is_element_valid.cpp
+++ b/cpp/src/bitmask/is_element_valid.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <cudf/detail/is_element_valid.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -30,15 +31,14 @@ bool is_element_valid_sync(column_view const& col_view,
   CUDF_EXPECTS(element_index >= 0 and element_index < col_view.size(), "invalid index.");
   if (!col_view.nullable()) { return true; }
 
-  bitmask_type word = 0;
   // null_mask() returns device ptr to bitmask without offset
   size_type const index = element_index + col_view.offset();
-  CUDF_CUDA_TRY(cudaMemcpyAsync(&word,
-                                col_view.null_mask() + word_index(index),
-                                sizeof(bitmask_type),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  stream.synchronize();
+
+  auto const word =
+    cudf::detail::make_host_vector_sync(
+      device_span<bitmask_type const>{col_view.null_mask() + word_index(index), 1}, stream)
+      .front();
+
   return static_cast<bool>(word & (bitmask_type{1} << intra_word_index(index)));
 }
 
diff --git a/cpp/src/column/column_device_view.cu b/cpp/src/column/column_device_view.cu
index fc244521617..9dc39f01ab3 100644
--- a/cpp/src/column/column_device_view.cu
+++ b/cpp/src/column/column_device_view.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -60,13 +61,12 @@ create_device_view_from_view(ColumnView const& source, rmm::cuda_stream_view str
   // A buffer of CPU memory is allocated to hold the ColumnDeviceView
   // objects. Once filled, the CPU memory is copied to device memory
   // and then set into the d_children member pointer.
-  std::vector<char> staging_buffer(descendant_storage_bytes);
+  auto staging_buffer = detail::make_host_vector<char>(descendant_storage_bytes, stream);
 
   // Each ColumnDeviceView instance may have child objects that
   // require setting some internal device pointers before being copied
   // from CPU to device.
-  rmm::device_buffer* const descendant_storage =
-    new rmm::device_buffer(descendant_storage_bytes, stream);
+  auto const descendant_storage = new rmm::device_uvector<char>(descendant_storage_bytes, stream);
 
   auto deleter = [descendant_storage](ColumnDeviceView* v) {
     v->destroy();
@@ -77,13 +77,7 @@ create_device_view_from_view(ColumnView const& source, rmm::cuda_stream_view str
     new ColumnDeviceView(source, staging_buffer.data(), descendant_storage->data()), deleter};
 
   // copy the CPU memory with all the children into device memory
-  CUDF_CUDA_TRY(cudaMemcpyAsync(descendant_storage->data(),
-                                staging_buffer.data(),
-                                descendant_storage->size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  stream.synchronize();
+  detail::cuda_memcpy<char>(*descendant_storage, staging_buffer, stream);
 
   return result;
 }
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index e3ed5b55415..3413f75357b 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -998,7 +998,8 @@ struct packed_split_indices_and_src_buf_info {
       src_buf_info_size(
         cudf::util::round_up_safe(num_src_bufs * sizeof(src_buf_info), split_align)),
       // host-side
-      h_indices_and_source_info(indices_size + src_buf_info_size),
+      h_indices_and_source_info{
+        detail::make_host_vector<uint8_t>(indices_size + src_buf_info_size, stream)},
       h_indices{reinterpret_cast<size_type*>(h_indices_and_source_info.data())},
       h_src_buf_info{
         reinterpret_cast<src_buf_info*>(h_indices_and_source_info.data() + indices_size)}
@@ -1025,15 +1026,18 @@ struct packed_split_indices_and_src_buf_info {
       reinterpret_cast<size_type*>(reinterpret_cast<uint8_t*>(d_indices_and_source_info.data()) +
                                    indices_size + src_buf_info_size);
 
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      d_indices, h_indices, indices_size + src_buf_info_size, cudaMemcpyDefault, stream.value()));
+    detail::cuda_memcpy_async<uint8_t>(
+      device_span<uint8_t>{static_cast<uint8_t*>(d_indices_and_source_info.data()),
+                           h_indices_and_source_info.size()},
+      h_indices_and_source_info,
+      stream);
   }
 
   size_type const indices_size;
   std::size_t const src_buf_info_size;
   std::size_t offset_stack_size;
 
-  std::vector<uint8_t> h_indices_and_source_info;
+  detail::host_vector<uint8_t> h_indices_and_source_info;
   rmm::device_buffer d_indices_and_source_info;
 
   size_type* const h_indices;
@@ -1055,27 +1059,26 @@ struct packed_partition_buf_size_and_dst_buf_info {
       buf_sizes_size{cudf::util::round_up_safe(num_partitions * sizeof(std::size_t), split_align)},
       dst_buf_info_size{cudf::util::round_up_safe(num_bufs * sizeof(dst_buf_info), split_align)},
       // host-side
-      h_buf_sizes_and_dst_info(buf_sizes_size + dst_buf_info_size),
+      h_buf_sizes_and_dst_info{
+        detail::make_host_vector<uint8_t>(buf_sizes_size + dst_buf_info_size, stream)},
       h_buf_sizes{reinterpret_cast<std::size_t*>(h_buf_sizes_and_dst_info.data())},
       h_dst_buf_info{
-        reinterpret_cast<dst_buf_info*>(h_buf_sizes_and_dst_info.data() + buf_sizes_size)},
+        reinterpret_cast<dst_buf_info*>(h_buf_sizes_and_dst_info.data() + buf_sizes_size),
+        num_bufs,
+        h_buf_sizes_and_dst_info.get_allocator().is_device_accessible()},
       // device-side
-      d_buf_sizes_and_dst_info(buf_sizes_size + dst_buf_info_size, stream, temp_mr),
+      d_buf_sizes_and_dst_info(h_buf_sizes_and_dst_info.size(), stream, temp_mr),
       d_buf_sizes{reinterpret_cast<std::size_t*>(d_buf_sizes_and_dst_info.data())},
       // destination buffer info
-      d_dst_buf_info{reinterpret_cast<dst_buf_info*>(
-        static_cast<uint8_t*>(d_buf_sizes_and_dst_info.data()) + buf_sizes_size)}
+      d_dst_buf_info{
+        reinterpret_cast<dst_buf_info*>(d_buf_sizes_and_dst_info.data() + buf_sizes_size), num_bufs}
   {
   }
 
   void copy_to_host()
   {
     // DtoH buf sizes and col info back to the host
-    CUDF_CUDA_TRY(cudaMemcpyAsync(h_buf_sizes,
-                                  d_buf_sizes,
-                                  buf_sizes_size + dst_buf_info_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
+    detail::cuda_memcpy_async<uint8_t>(h_buf_sizes_and_dst_info, d_buf_sizes_and_dst_info, stream);
   }
 
   rmm::cuda_stream_view const stream;
@@ -1084,13 +1087,13 @@ struct packed_partition_buf_size_and_dst_buf_info {
   std::size_t const buf_sizes_size;
   std::size_t const dst_buf_info_size;
 
-  std::vector<uint8_t> h_buf_sizes_and_dst_info;
+  detail::host_vector<uint8_t> h_buf_sizes_and_dst_info;
   std::size_t* const h_buf_sizes;
-  dst_buf_info* const h_dst_buf_info;
+  host_span<dst_buf_info> const h_dst_buf_info;
 
-  rmm::device_buffer d_buf_sizes_and_dst_info;
+  rmm::device_uvector<uint8_t> d_buf_sizes_and_dst_info;
   std::size_t* const d_buf_sizes;
-  dst_buf_info* const d_dst_buf_info;
+  device_span<dst_buf_info> const d_dst_buf_info;
 };
 
 // Packed block of memory 3:
@@ -1106,11 +1109,12 @@ struct packed_src_and_dst_pointers {
       src_bufs_size{cudf::util::round_up_safe(num_src_bufs * sizeof(uint8_t*), split_align)},
       dst_bufs_size{cudf::util::round_up_safe(num_partitions * sizeof(uint8_t*), split_align)},
       // host-side
-      h_src_and_dst_buffers(src_bufs_size + dst_bufs_size),
+      h_src_and_dst_buffers{
+        detail::make_host_vector<uint8_t>(src_bufs_size + dst_bufs_size, stream)},
       h_src_bufs{reinterpret_cast<uint8_t const**>(h_src_and_dst_buffers.data())},
       h_dst_bufs{reinterpret_cast<uint8_t**>(h_src_and_dst_buffers.data() + src_bufs_size)},
       // device-side
-      d_src_and_dst_buffers{rmm::device_buffer(src_bufs_size + dst_bufs_size, stream, temp_mr)},
+      d_src_and_dst_buffers{h_src_and_dst_buffers.size(), stream, temp_mr},
       d_src_bufs{reinterpret_cast<uint8_t const**>(d_src_and_dst_buffers.data())},
       d_dst_bufs{reinterpret_cast<uint8_t**>(
         reinterpret_cast<uint8_t*>(d_src_and_dst_buffers.data()) + src_bufs_size)}
@@ -1121,18 +1125,18 @@ struct packed_src_and_dst_pointers {
 
   void copy_to_device()
   {
-    CUDF_CUDA_TRY(cudaMemcpyAsync(d_src_and_dst_buffers.data(),
-                                  h_src_and_dst_buffers.data(),
-                                  src_bufs_size + dst_bufs_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
+    detail::cuda_memcpy_async<uint8_t>(
+      device_span<uint8_t>{static_cast<uint8_t*>(d_src_and_dst_buffers.data()),
+                           d_src_and_dst_buffers.size()},
+      h_src_and_dst_buffers,
+      stream);
   }
 
   rmm::cuda_stream_view const stream;
   std::size_t const src_bufs_size;
   std::size_t const dst_bufs_size;
 
-  std::vector<uint8_t> h_src_and_dst_buffers;
+  detail::host_vector<uint8_t> h_src_and_dst_buffers;
   uint8_t const** const h_src_bufs;
   uint8_t** const h_dst_bufs;
 
@@ -1205,7 +1209,7 @@ std::unique_ptr<packed_partition_buf_size_and_dst_buf_info> compute_splits(
     std::make_unique<packed_partition_buf_size_and_dst_buf_info>(
       num_partitions, num_bufs, stream, temp_mr);
 
-  auto const d_dst_buf_info = partition_buf_size_and_dst_buf_info->d_dst_buf_info;
+  auto const d_dst_buf_info = partition_buf_size_and_dst_buf_info->d_dst_buf_info.begin();
   auto const d_buf_sizes    = partition_buf_size_and_dst_buf_info->d_buf_sizes;
 
   auto const split_indices_and_src_buf_info = packed_split_indices_and_src_buf_info(
@@ -1518,26 +1522,19 @@ std::unique_ptr<chunk_iteration_state> chunk_iteration_state::create(
    */
   if (user_buffer_size != 0) {
     // copy the batch offsets back to host
-    std::vector<std::size_t> h_offsets(num_batches + 1);
-    {
-      rmm::device_uvector<std::size_t> offsets(h_offsets.size(), stream, temp_mr);
+    auto const h_offsets = [&] {
+      rmm::device_uvector<std::size_t> offsets(num_batches + 1, stream, temp_mr);
       auto const batch_byte_size_iter = cudf::detail::make_counting_transform_iterator(
         0, batch_byte_size_function{num_batches, d_batched_dst_buf_info.begin()});
 
-      thrust::exclusive_scan(rmm::exec_policy(stream, temp_mr),
+      thrust::exclusive_scan(rmm::exec_policy_nosync(stream, temp_mr),
                              batch_byte_size_iter,
-                             batch_byte_size_iter + num_batches + 1,
+                             batch_byte_size_iter + offsets.size(),
                              offsets.begin());
 
-      CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(),
-                                    offsets.data(),
-                                    sizeof(std::size_t) * offsets.size(),
-                                    cudaMemcpyDefault,
-                                    stream.value()));
-
       // the next part is working on the CPU, so we want to synchronize here
-      stream.synchronize();
-    }
+      return detail::make_host_vector_sync(offsets, stream);
+    }();
 
     std::vector<std::size_t> num_batches_per_iteration;
     std::vector<std::size_t> size_of_batches_per_iteration;
@@ -1699,7 +1696,7 @@ void copy_data(int num_batches_to_copy,
                int starting_batch,
                uint8_t const** d_src_bufs,
                uint8_t** d_dst_bufs,
-               rmm::device_uvector<dst_buf_info>& d_dst_buf_info,
+               device_span<dst_buf_info> d_dst_buf_info,
                uint8_t* user_buffer,
                rmm::cuda_stream_view stream)
 {
@@ -1833,15 +1830,9 @@ struct contiguous_split_state {
                           keys + num_batches_total,
                           values,
                           thrust::make_discard_iterator(),
-                          dst_valid_count_output_iterator{d_orig_dst_buf_info});
-
-    CUDF_CUDA_TRY(cudaMemcpyAsync(h_orig_dst_buf_info,
-                                  d_orig_dst_buf_info,
-                                  partition_buf_size_and_dst_buf_info->dst_buf_info_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
+                          dst_valid_count_output_iterator{d_orig_dst_buf_info.begin()});
 
-    stream.synchronize();
+    detail::cuda_memcpy<dst_buf_info>(h_orig_dst_buf_info, d_orig_dst_buf_info, stream);
 
     // not necessary for the non-chunked case, but it makes it so further calls to has_next
     // return false, just in case
@@ -1889,7 +1880,7 @@ struct contiguous_split_state {
     }
 
     auto& h_dst_buf_info  = partition_buf_size_and_dst_buf_info->h_dst_buf_info;
-    auto cur_dst_buf_info = h_dst_buf_info;
+    auto cur_dst_buf_info = h_dst_buf_info.data();
     detail::metadata_builder mb{input.num_columns()};
 
     populate_metadata(input.begin(), input.end(), cur_dst_buf_info, mb);
@@ -1927,7 +1918,7 @@ struct contiguous_split_state {
 
     // Second pass: uses `dst_buf_info` to break down the work into 1MB batches.
     chunk_iter_state = compute_batches(num_bufs,
-                                       partition_buf_size_and_dst_buf_info->d_dst_buf_info,
+                                       partition_buf_size_and_dst_buf_info->d_dst_buf_info.data(),
                                        partition_buf_size_and_dst_buf_info->h_buf_sizes,
                                        num_partitions,
                                        user_buffer_size,
@@ -1963,7 +1954,7 @@ struct contiguous_split_state {
     auto& h_dst_buf_info = partition_buf_size_and_dst_buf_info->h_dst_buf_info;
     auto& h_dst_bufs     = src_and_dst_pointers->h_dst_bufs;
 
-    auto cur_dst_buf_info = h_dst_buf_info;
+    auto cur_dst_buf_info = h_dst_buf_info.data();
     detail::metadata_builder mb(input.num_columns());
 
     for (std::size_t idx = 0; idx < num_partitions; idx++) {
diff --git a/cpp/src/groupby/hash/compute_aggregations.cuh b/cpp/src/groupby/hash/compute_aggregations.cuh
index e8b29a0e7a8..9c9a4c97bff 100644
--- a/cpp/src/groupby/hash/compute_aggregations.cuh
+++ b/cpp/src/groupby/hash/compute_aggregations.cuh
@@ -60,7 +60,7 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
   rmm::cuda_stream_view stream)
 {
   // flatten the aggs to a table that can be operated on by aggregate_row
-  auto [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests);
+  auto [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests, stream);
   auto const d_agg_kinds                   = cudf::detail::make_device_uvector_async(
     agg_kinds, stream, rmm::mr::get_current_device_resource());
 
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.cu b/cpp/src/groupby/hash/compute_global_memory_aggs.cu
index 6025686953e..d2830f7d905 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.cu
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.cu
@@ -24,7 +24,7 @@ template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<global_
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> const& agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   global_set_t& global_set,
   std::vector<std::unique_ptr<aggregation>>& aggregations,
   cudf::detail::result_cache* sparse_results,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.cuh b/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
index 00db149c6d9..671ee2ea31f 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
@@ -25,6 +25,7 @@
 #include <cudf/groupby.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -44,7 +45,7 @@ rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> const& agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   SetType& global_set,
   std::vector<std::unique_ptr<aggregation>>& aggregations,
   cudf::detail::result_cache* sparse_results,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.hpp b/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
index 0777b9ffd93..437823a3fea 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
@@ -19,6 +19,7 @@
 #include <cudf/groupby.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -34,7 +35,7 @@ rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> const& agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   SetType& global_set,
   std::vector<std::unique_ptr<aggregation>>& aggregations,
   cudf::detail::result_cache* sparse_results,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu b/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
index 209e2b7f20a..7cb3f8f190b 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
@@ -24,7 +24,7 @@ template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<nullabl
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> const& agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   nullable_global_set_t& global_set,
   std::vector<std::unique_ptr<aggregation>>& aggregations,
   cudf::detail::result_cache* sparse_results,
diff --git a/cpp/src/groupby/hash/create_sparse_results_table.cu b/cpp/src/groupby/hash/create_sparse_results_table.cu
index bc32e306b3f..a835736235c 100644
--- a/cpp/src/groupby/hash/create_sparse_results_table.cu
+++ b/cpp/src/groupby/hash/create_sparse_results_table.cu
@@ -23,6 +23,7 @@
 #include <cudf/detail/aggregation/aggregation.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -48,7 +49,7 @@ void extract_populated_keys(SetType const& key_set,
 template <typename GlobalSetType>
 cudf::table create_sparse_results_table(cudf::table_view const& flattened_values,
                                         cudf::aggregation::Kind const* d_agg_kinds,
-                                        std::vector<cudf::aggregation::Kind> agg_kinds,
+                                        host_span<cudf::aggregation::Kind const> agg_kinds,
                                         bool direct_aggregations,
                                         GlobalSetType const& global_set,
                                         rmm::device_uvector<cudf::size_type>& populated_keys,
@@ -107,7 +108,7 @@ template void extract_populated_keys<nullable_global_set_t>(
 template cudf::table create_sparse_results_table<global_set_t>(
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   bool direct_aggregations,
   global_set_t const& global_set,
   rmm::device_uvector<cudf::size_type>& populated_keys,
@@ -116,7 +117,7 @@ template cudf::table create_sparse_results_table<global_set_t>(
 template cudf::table create_sparse_results_table<nullable_global_set_t>(
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
-  std::vector<cudf::aggregation::Kind> agg_kinds,
+  host_span<cudf::aggregation::Kind const> agg_kinds,
   bool direct_aggregations,
   nullable_global_set_t const& global_set,
   rmm::device_uvector<cudf::size_type>& populated_keys,
diff --git a/cpp/src/groupby/hash/create_sparse_results_table.hpp b/cpp/src/groupby/hash/create_sparse_results_table.hpp
index 8155ce852e0..4e2fa81bdb7 100644
--- a/cpp/src/groupby/hash/create_sparse_results_table.hpp
+++ b/cpp/src/groupby/hash/create_sparse_results_table.hpp
@@ -20,12 +20,11 @@
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
-#include <vector>
-
 namespace cudf::groupby::detail::hash {
 /**
  * @brief Computes and returns a device vector containing all populated keys in
@@ -47,7 +46,7 @@ void extract_populated_keys(SetType const& key_set,
 template <typename GlobalSetType>
 cudf::table create_sparse_results_table(cudf::table_view const& flattened_values,
                                         cudf::aggregation::Kind const* d_agg_kinds,
-                                        std::vector<cudf::aggregation::Kind> agg_kinds,
+                                        host_span<cudf::aggregation::Kind const> agg_kinds,
                                         bool direct_aggregations,
                                         GlobalSetType const& global_set,
                                         rmm::device_uvector<cudf::size_type>& populated_keys,
diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
index b2048a9fbb8..a533f7a6448 100644
--- a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
+++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
@@ -18,6 +18,7 @@
 
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/types.hpp>
@@ -102,12 +103,15 @@ class groupby_simple_aggregations_collector final
 };
 
 // flatten aggs to filter in single pass aggs
-std::tuple<table_view, std::vector<aggregation::Kind>, std::vector<std::unique_ptr<aggregation>>>
-flatten_single_pass_aggs(host_span<aggregation_request const> requests)
+std::tuple<table_view,
+           cudf::detail::host_vector<aggregation::Kind>,
+           std::vector<std::unique_ptr<aggregation>>>
+flatten_single_pass_aggs(host_span<aggregation_request const> requests,
+                         rmm::cuda_stream_view stream)
 {
   std::vector<column_view> columns;
   std::vector<std::unique_ptr<aggregation>> aggs;
-  std::vector<aggregation::Kind> agg_kinds;
+  auto agg_kinds = cudf::detail::make_empty_host_vector<aggregation::Kind>(requests.size(), stream);
 
   for (auto const& request : requests) {
     auto const& agg_v = request.aggregations;
diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp
index dfad51f27d4..e3c17ca972c 100644
--- a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp
+++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp
@@ -26,7 +26,10 @@
 namespace cudf::groupby::detail::hash {
 
 // flatten aggs to filter in single pass aggs
-std::tuple<table_view, std::vector<aggregation::Kind>, std::vector<std::unique_ptr<aggregation>>>
-flatten_single_pass_aggs(host_span<aggregation_request const> requests);
+std::tuple<table_view,
+           cudf::detail::host_vector<aggregation::Kind>,
+           std::vector<std::unique_ptr<aggregation>>>
+flatten_single_pass_aggs(host_span<aggregation_request const> requests,
+                         rmm::cuda_stream_view stream);
 
 }  // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
index 37a61c1a22c..b71e20938d6 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
@@ -170,7 +170,8 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::var_aggregation c
     cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
   auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream);
   mutable_table_view var_table_view{{var_result->mutable_view()}};
-  cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream);
+  cudf::detail::initialize_with_identity(
+    var_table_view, host_span<cudf::aggregation::Kind const>(&agg.kind, 1), stream);
 
   thrust::for_each_n(
     rmm::exec_policy_nosync(stream),
diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh
index 86835ea8a67..5082ad01327 100644
--- a/cpp/src/groupby/sort/group_scan_util.cuh
+++ b/cpp/src/groupby/sort/group_scan_util.cuh
@@ -107,7 +107,10 @@ struct group_scan_functor<K, T, std::enable_if_t<is_group_scan_supported<K, T>()
     if (values.is_empty()) { return result; }
 
     auto result_table = mutable_table_view({*result});
-    cudf::detail::initialize_with_identity(result_table, {K}, stream);
+    // Need an address of the aggregation kind to pass to the span
+    auto const kind = K;
+    cudf::detail::initialize_with_identity(
+      result_table, host_span<aggregation::Kind const>(&kind, 1), stream);
 
     auto result_view = mutable_column_device_view::create(result->mutable_view(), stream);
     auto values_view = column_device_view::create(values, stream);
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index 7f0b5e07b09..e05353ee822 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -21,13 +21,13 @@
 
 #include "csv_common.hpp"
 #include "csv_gpu.hpp"
-#include "cudf/detail/utilities/cuda_memcpy.hpp"
 #include "io/comp/io_uncomp.hpp"
 #include "io/utilities/column_buffer.hpp"
 #include "io/utilities/hostdevice_vector.hpp"
 #include "io/utilities/parsing_utils.cuh"
 
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda_memcpy.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/io/csv.hpp>
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 8e532b01788..6b9c19368dc 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -19,7 +19,6 @@
  * @brief cuDF-IO ORC writer class implementation
  */
 
-#include "cudf/detail/utilities/cuda_memcpy.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/orc/orc_gpu.hpp"
 #include "io/statistics/column_statistics.cuh"
@@ -30,6 +29,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/batched_memcpy.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/cuda_memcpy.hpp>
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/logger.hpp>
diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu
index 98fd9f679c8..21d8c95e199 100644
--- a/cpp/src/reductions/minmax.cu
+++ b/cpp/src/reductions/minmax.cu
@@ -218,9 +218,8 @@ struct minmax_functor {
     auto dev_result = reduce<cudf::string_view>(col, stream);
     // copy the minmax_pair to the host; does not copy the strings
     using OutputType = minmax_pair<cudf::string_view>;
-    OutputType host_result;
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      &host_result, dev_result.data(), sizeof(OutputType), cudaMemcpyDefault, stream.value()));
+
+    auto const host_result = dev_result.value(stream);
     // strings are copied to create the scalars here
     return {std::make_unique<string_scalar>(host_result.min_val, true, stream, mr),
             std::make_unique<string_scalar>(host_result.max_val, true, stream, mr)};
@@ -236,10 +235,8 @@ struct minmax_functor {
     // compute minimum and maximum values
     auto dev_result = reduce<T>(col, stream);
     // copy the minmax_pair to the host to call get_element
-    using OutputType = minmax_pair<T>;
-    OutputType host_result;
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      &host_result, dev_result.data(), sizeof(OutputType), cudaMemcpyDefault, stream.value()));
+    using OutputType       = minmax_pair<T>;
+    OutputType host_result = dev_result.value(stream);
     // get the keys for those indexes
     auto const keys = dictionary_column_view(col).keys();
     return {detail::get_element(keys, static_cast<size_type>(host_result.min_val), stream, mr),
diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp
index 4ec2174a96f..4b0b08fe251 100644
--- a/cpp/src/scalar/scalar.cpp
+++ b/cpp/src/scalar/scalar.cpp
@@ -114,11 +114,10 @@ string_scalar::operator std::string() const { return this->to_string(cudf::get_d
 
 std::string string_scalar::to_string(rmm::cuda_stream_view stream) const
 {
-  std::string result;
-  result.resize(_data.size());
-  CUDF_CUDA_TRY(
-    cudaMemcpyAsync(&result[0], _data.data(), _data.size(), cudaMemcpyDefault, stream.value()));
-  stream.synchronize();
+  std::string result(size(), '\0');
+  detail::cuda_memcpy(host_span<char>{result.data(), result.size()},
+                      device_span<char const>{data(), _data.size()},
+                      stream);
   return result;
 }
 
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index 2df404048f7..d22fb04696c 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -186,7 +186,7 @@ class reprog_device {
    *            Specify -1 to match any virtual positions past the end of the string.
    * @return If match found, returns character positions of the matches.
    */
-  __device__ [[nodiscard]] inline match_result find(int32_t const thread_idx,
+  [[nodiscard]] __device__ inline match_result find(int32_t const thread_idx,
                                                     string_view const d_str,
                                                     string_view::const_iterator begin,
                                                     cudf::size_type end = -1) const;
@@ -205,7 +205,7 @@ class reprog_device {
    * @param group_id The specific group to return its matching position values.
    * @return If valid, returns the character position of the matched group in the given string,
    */
-  __device__ [[nodiscard]] inline match_result extract(int32_t const thread_idx,
+  [[nodiscard]] __device__ inline match_result extract(int32_t const thread_idx,
                                                        string_view const d_str,
                                                        string_view::const_iterator begin,
                                                        cudf::size_type end,
@@ -225,17 +225,17 @@ class reprog_device {
   /**
    * @brief Returns the regex instruction object for a given id.
    */
-  __device__ [[nodiscard]] inline reinst get_inst(int32_t id) const;
+  [[nodiscard]] __device__ inline reinst get_inst(int32_t id) const;
 
   /**
    * @brief Returns the regex class object for a given id.
    */
-  __device__ [[nodiscard]] inline reclass_device get_class(int32_t id) const;
+  [[nodiscard]] __device__ inline reclass_device get_class(int32_t id) const;
 
   /**
    * @brief Executes the regex pattern on the given string.
    */
-  __device__ [[nodiscard]] inline match_result regexec(string_view const d_str,
+  [[nodiscard]] __device__ inline match_result regexec(string_view const d_str,
                                                        reljunk jnk,
                                                        string_view::const_iterator begin,
                                                        cudf::size_type end,
@@ -244,7 +244,7 @@ class reprog_device {
   /**
    * @brief Utility wrapper to setup state memory structures for calling regexec
    */
-  __device__ [[nodiscard]] inline match_result call_regexec(
+  [[nodiscard]] __device__ inline match_result call_regexec(
     int32_t const thread_idx,
     string_view const d_str,
     string_view::const_iterator begin,
diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl
index e34a1e12015..906f09e4d82 100644
--- a/cpp/src/strings/regex/regex.inl
+++ b/cpp/src/strings/regex/regex.inl
@@ -81,11 +81,11 @@ struct alignas(8) relist {
     return true;
   }
 
-  __device__ [[nodiscard]] __forceinline__ restate get_state(int16_t idx) const
+  [[nodiscard]] __device__ __forceinline__ restate get_state(int16_t idx) const
   {
     return restate{ranges[idx * stride], inst_ids[idx * stride]};
   }
-  __device__ [[nodiscard]] __forceinline__ int16_t get_size() const { return size; }
+  [[nodiscard]] __device__ __forceinline__ int16_t get_size() const { return size; }
 
  private:
   int16_t size{};
@@ -101,7 +101,7 @@ struct alignas(8) relist {
     mask[pos >> 3] |= uc;
   }
 
-  __device__ [[nodiscard]] __forceinline__ bool readMask(int32_t pos) const
+  [[nodiscard]] __device__ __forceinline__ bool readMask(int32_t pos) const
   {
     u_char const uc = mask[pos >> 3];
     return static_cast<bool>((uc >> (pos & 7)) & 1);
diff --git a/cpp/src/strings/regex/regexec.cpp b/cpp/src/strings/regex/regexec.cpp
index 3d11b641b3f..902e13fe75e 100644
--- a/cpp/src/strings/regex/regexec.cpp
+++ b/cpp/src/strings/regex/regexec.cpp
@@ -17,7 +17,9 @@
 #include "strings/regex/regcomp.h"
 #include "strings/regex/regex.cuh"
 
+#include <cudf/detail/utilities/cuda_memcpy.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/detail/char_tables.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -66,10 +68,11 @@ std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_devic
                        cudf::util::round_up_safe(classes_size, sizeof(char32_t));
 
   // allocate memory to store all the prog data in a flat contiguous buffer
-  std::vector<u_char> h_buffer(memsize);                        // copy everything into here;
-  auto h_ptr    = h_buffer.data();                              // this is our running host ptr;
-  auto d_buffer = new rmm::device_buffer(memsize, stream);      // output device memory;
-  auto d_ptr    = reinterpret_cast<u_char*>(d_buffer->data());  // running device pointer
+  auto h_buffer =
+    cudf::detail::make_host_vector<u_char>(memsize, stream);  // copy everything into here;
+  auto h_ptr    = h_buffer.data();                            // this is our running host ptr;
+  auto d_buffer = new rmm::device_uvector<u_char>(memsize, stream);  // output device memory;
+  auto d_ptr    = d_buffer->data();                                  // running device pointer
 
   // create our device object; this is managed separately and returned to the caller
   auto* d_prog = new reprog_device(h_prog);
@@ -113,8 +116,7 @@ std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_devic
   d_prog->_prog_size = memsize + sizeof(reprog_device);
 
   // copy flat prog to device memory
-  CUDF_CUDA_TRY(
-    cudaMemcpyAsync(d_buffer->data(), h_buffer.data(), memsize, cudaMemcpyDefault, stream.value()));
+  cudf::detail::cuda_memcpy_async<u_char>(*d_buffer, h_buffer, stream);
 
   // build deleter to cleanup device memory
   auto deleter = [d_buffer](reprog_device* t) {
diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu
index b13ad0a7de8..ee51a426eac 100644
--- a/cpp/src/text/subword/load_hash_file.cu
+++ b/cpp/src/text/subword/load_hash_file.cu
@@ -19,6 +19,8 @@
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/cuda_memcpy.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -198,8 +200,8 @@ std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
   std::getline(hash_file, line);
   result.num_bins = str_to_uint32(line, line_no++);
 
-  std::vector<uint64_t> bin_coefficients(result.num_bins);
-  std::vector<uint16_t> bin_offsets(result.num_bins);
+  auto bin_coefficients = cudf::detail::make_host_vector<uint64_t>(result.num_bins, stream);
+  auto bin_offsets      = cudf::detail::make_host_vector<uint16_t>(result.num_bins, stream);
 
   for (int i = 0; i < result.num_bins; ++i) {
     std::getline(hash_file, line);
@@ -216,7 +218,7 @@ std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
 
   std::getline(hash_file, line);
   uint64_t hash_table_length = str_to_uint64(line, line_no++);
-  std::vector<uint64_t> table(hash_table_length);
+  auto table                 = cudf::detail::make_host_vector<uint64_t>(hash_table_length, stream);
 
   std::generate(table.begin(), table.end(), [&hash_file, &line_no]() {
     std::string line;
@@ -239,33 +241,32 @@ std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
                                            cudf::mask_state::UNALLOCATED,
                                            stream,
                                            mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(result.table->mutable_view().data<uint64_t>(),
-                                table.data(),
-                                table.size() * sizeof(uint64_t),
-                                cudaMemcpyDefault,
-                                stream.value()));
+  cudf::detail::cuda_memcpy_async<uint64_t>(
+    cudf::device_span<uint64_t>(result.table->mutable_view().data<uint64_t>(), table.size()),
+    table,
+    stream);
 
   result.bin_coefficients = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT64},
                                                       bin_coefficients.size(),
                                                       cudf::mask_state::UNALLOCATED,
                                                       stream,
                                                       mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(result.bin_coefficients->mutable_view().data<uint64_t>(),
-                                bin_coefficients.data(),
-                                bin_coefficients.size() * sizeof(uint64_t),
-                                cudaMemcpyDefault,
-                                stream.value()));
+  cudf::detail::cuda_memcpy_async<uint64_t>(
+    cudf::device_span<uint64_t>(result.bin_coefficients->mutable_view().data<uint64_t>(),
+                                bin_coefficients.size()),
+    bin_coefficients,
+    stream);
 
   result.bin_offsets = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT16},
                                                  bin_offsets.size(),
                                                  cudf::mask_state::UNALLOCATED,
                                                  stream,
                                                  mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(result.bin_offsets->mutable_view().data<uint16_t>(),
-                                bin_offsets.data(),
-                                bin_offsets.size() * sizeof(uint16_t),
-                                cudaMemcpyDefault,
-                                stream.value()));
+  cudf::detail::cuda_memcpy_async<uint16_t>(
+    cudf::device_span<uint16_t>(result.bin_offsets->mutable_view().data<uint16_t>(),
+                                bin_offsets.size()),
+    bin_offsets,
+    stream);
 
   auto cp_metadata            = detail::get_codepoint_metadata(stream);
   auto const cp_metadata_size = static_cast<cudf::size_type>(cp_metadata.size());
diff --git a/cpp/tests/streams/replace_test.cpp b/cpp/tests/streams/replace_test.cpp
index 89f76237de6..e3fdc177b50 100644
--- a/cpp/tests/streams/replace_test.cpp
+++ b/cpp/tests/streams/replace_test.cpp
@@ -104,9 +104,9 @@ TEST_F(ReplaceTest, NormalizeNansAndZeros)
 
 TEST_F(ReplaceTest, NormalizeNansAndZerosMutable)
 {
-  auto nan          = std::numeric_limits<double>::quiet_NaN();
-  auto input_column = cudf::test::make_type_param_vector<double>({-0.0, 0.0, -nan, nan, nan});
-  cudf::test::fixed_width_column_wrapper<double> input(input_column.begin(), input_column.end());
-  cudf::mutable_column_view mutable_view = cudf::column(input, cudf::test::get_default_stream());
-  cudf::normalize_nans_and_zeros(mutable_view, cudf::test::get_default_stream());
+  auto nan   = std::numeric_limits<double>::quiet_NaN();
+  auto data  = cudf::test::make_type_param_vector<double>({-0.0, 0.0, -nan, nan, nan});
+  auto input = cudf::test::fixed_width_column_wrapper<double>(data.begin(), data.end()).release();
+  auto view  = input->mutable_view();
+  cudf::normalize_nans_and_zeros(view, cudf::test::get_default_stream());
 }
diff --git a/dependencies.yaml b/dependencies.yaml
index 44767f1e9d3..7a83efc6e3d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -878,7 +878,7 @@ dependencies:
           - matrix: {dependencies: "oldest"}
             packages:
               - numpy==1.23.*
-              - pyarrow==14.0.0
+              - pyarrow==14.*
           - matrix:
             packages:
       - output_types: conda
@@ -903,7 +903,7 @@ dependencies:
           - matrix: {dependencies: "oldest"}
             packages:
               - numpy==1.24.*
-              - pyarrow==14.0.1
+              - pyarrow==14.*
           - matrix:
             packages:
   test_python_cudf_polars:
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index 427ffcc8c12..410fd57691e 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -12,9 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx copying.pyx groupby.pyx interop.pyx scalar.pyx stream_compaction.pyx
-                   string_casting.pyx strings_udf.pyx types.pyx utils.pyx
-)
+set(cython_sources column.pyx groupby.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
 set(linked_libraries cudf::cudf)
 
 rapids_cython_create_modules(
@@ -24,8 +22,3 @@ rapids_cython_create_modules(
 )
 
 target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
-target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
-
-include(${rapids-cmake-dir}/export/find_package_root.cmake)
-include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
-target_link_libraries(interop PUBLIC nanoarrow)
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 26afdd62caf..6b5a7814e48 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -2,11 +2,7 @@
 import numpy as np
 
 from . import (
-    copying,
     groupby,
-    interop,
-    stream_compaction,
-    string_casting,
     strings_udf,
 )
 
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
deleted file mode 100644
index ef544dc89eb..00000000000
--- a/python/cudf/cudf/_lib/copying.pyx
+++ /dev/null
@@ -1,451 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-import pylibcudf
-
-import cudf
-from cudf.core.buffer import acquire_spill_lock, as_buffer
-from cudf.core.abc import Serializable
-from cudf._lib.column cimport Column
-
-from cudf._lib.scalar import as_device_scalar
-
-from cudf._lib.scalar cimport DeviceScalar
-
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_pylibcudf_table
-import pylibcudf as plc
-from pylibcudf.contiguous_split cimport PackedColumns as PlcPackedColumns
-
-
-def _gather_map_is_valid(
-    gather_map: "cudf.core.column.ColumnBase",
-    nrows: int,
-    check_bounds: bool,
-    nullify: bool,
-) -> bool:
-    """Returns true if gather map is valid.
-
-    A gather map is valid if empty or all indices are within the range
-    ``[-nrows, nrows)``, except when ``nullify`` is specified.
-    """
-    if not check_bounds or nullify or len(gather_map) == 0:
-        return True
-    gm_min, gm_max = gather_map.minmax()
-    return gm_min >= -nrows and gm_max < nrows
-
-
-@acquire_spill_lock()
-def copy_column(Column input_column):
-    """
-    Deep copies a column
-
-    Parameters
-    ----------
-    input_columns : column to be copied
-
-    Returns
-    -------
-    Deep copied column
-    """
-    return Column.from_pylibcudf(
-        input_column.to_pylibcudf(mode="read").copy()
-    )
-
-
-@acquire_spill_lock()
-def _copy_range_in_place(Column input_column,
-                         Column target_column,
-                         size_type input_begin,
-                         size_type input_end,
-                         size_type target_begin):
-    pylibcudf.copying.copy_range(
-        input_column.to_pylibcudf(mode="write"),
-        target_column.to_pylibcudf(mode="write"),
-        input_begin,
-        input_end,
-        target_begin
-    )
-
-
-def _copy_range(Column input_column,
-                Column target_column,
-                size_type input_begin,
-                size_type input_end,
-                size_type target_begin):
-    return Column.from_pylibcudf(
-        pylibcudf.copying.copy_range(
-            input_column.to_pylibcudf(mode="read"),
-            target_column.to_pylibcudf(mode="read"),
-            input_begin,
-            input_end,
-            target_begin
-        )
-    )
-
-
-@acquire_spill_lock()
-def copy_range(Column source_column,
-               Column target_column,
-               size_type source_begin,
-               size_type source_end,
-               size_type target_begin,
-               size_type target_end,
-               bool inplace):
-    """
-    Copy a contiguous range from a source to a target column
-
-    Notes
-    -----
-    Expects the source and target ranges to have been sanitised to be
-    in-range for the source and target column respectively. For
-    example via ``slice.indices``.
-    """
-
-    msg = "Source and target ranges must be same length"
-    assert source_end - source_begin == target_end - target_begin, msg
-    if target_end >= target_begin and inplace:
-        # FIXME: Are we allowed to do this when inplace=False?
-        return target_column
-
-    if inplace:
-        _copy_range_in_place(source_column, target_column,
-                             source_begin, source_end, target_begin)
-    else:
-        return _copy_range(source_column, target_column,
-                           source_begin, source_end, target_begin)
-
-
-@acquire_spill_lock()
-def gather(
-    list columns,
-    Column gather_map,
-    bool nullify=False
-):
-    tbl = pylibcudf.copying.gather(
-        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in columns]),
-        gather_map.to_pylibcudf(mode="read"),
-        pylibcudf.copying.OutOfBoundsPolicy.NULLIFY if nullify
-        else pylibcudf.copying.OutOfBoundsPolicy.DONT_CHECK
-    )
-    return columns_from_pylibcudf_table(tbl)
-
-
-@acquire_spill_lock()
-def scatter(list sources, Column scatter_map, list target_columns,
-            bool bounds_check=True):
-    """
-    Scattering source into target as per the scatter map.
-    `source` can be a list of scalars, or a list of columns. The number of
-    items in `sources` must equal the number of `target_columns` to scatter.
-    """
-    # TODO: Only single column scatter is used, we should explore multi-column
-    # scatter for frames for performance increase.
-
-    if len(sources) != len(target_columns):
-        raise ValueError("Mismatched number of source and target columns.")
-
-    if len(sources) == 0:
-        return []
-
-    if bounds_check:
-        n_rows = len(target_columns[0])
-        if not (
-            (scatter_map >= -n_rows).all()
-            and (scatter_map < n_rows).all()
-        ):
-            raise IndexError(
-                f"index out of bounds for column of size {n_rows}"
-            )
-
-    tbl = pylibcudf.copying.scatter(
-        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources])
-        if isinstance(sources[0], Column)
-        else [(<DeviceScalar> as_device_scalar(slr)).c_value for slr in sources],
-        scatter_map.to_pylibcudf(mode="read"),
-        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-    )
-
-    return columns_from_pylibcudf_table(tbl)
-
-
-@acquire_spill_lock()
-def column_empty_like(Column input_column):
-    return Column.from_pylibcudf(
-        pylibcudf.copying.empty_like(
-            input_column.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def column_allocate_like(Column input_column, size=None):
-    return Column.from_pylibcudf(
-        pylibcudf.copying.allocate_like(
-            input_column.to_pylibcudf(mode="read"),
-            size,
-        )
-    )
-
-
-@acquire_spill_lock()
-def columns_empty_like(list input_columns):
-    return columns_from_pylibcudf_table(
-        pylibcudf.copying.empty_like(
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns])
-        )
-    )
-
-
-@acquire_spill_lock()
-def column_slice(Column input_column, object indices):
-    return [
-        Column.from_pylibcudf(c)
-        for c in pylibcudf.copying.slice(
-            input_column.to_pylibcudf(mode="read"),
-            list(indices),
-        )
-    ]
-
-
-@acquire_spill_lock()
-def columns_slice(list input_columns, object indices):
-    return [
-        columns_from_pylibcudf_table(tbl)
-        for tbl in pylibcudf.copying.slice(
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
-            list(indices),
-        )
-    ]
-
-
-@acquire_spill_lock()
-def column_split(Column input_column, object splits):
-    return [
-        Column.from_pylibcudf(c)
-        for c in pylibcudf.copying.split(
-            input_column.to_pylibcudf(mode="read"),
-            list(splits),
-        )
-    ]
-
-
-@acquire_spill_lock()
-def columns_split(list input_columns, object splits):
-    return [
-        columns_from_pylibcudf_table(tbl)
-        for tbl in pylibcudf.copying.split(
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
-            list(splits),
-        )
-    ]
-
-
-@acquire_spill_lock()
-def copy_if_else(object lhs, object rhs, Column boolean_mask):
-    return Column.from_pylibcudf(
-        pylibcudf.copying.copy_if_else(
-            lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column)
-            else (<DeviceScalar> as_device_scalar(lhs)).c_value,
-            rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column)
-            else (<DeviceScalar> as_device_scalar(rhs)).c_value,
-            boolean_mask.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-@acquire_spill_lock()
-def boolean_mask_scatter(list input_, list target_columns,
-                         Column boolean_mask):
-    """Copy the target columns, replacing masked rows with input data.
-
-    The ``input_`` data can be a list of columns or as a list of scalars.
-    A list of input columns will be used to replace corresponding rows in the
-    target columns for which the boolean mask is ``True``. For the nth ``True``
-    in the boolean mask, the nth row in ``input_`` is used to replace. A list
-    of input scalars will replace all rows in the target columns for which the
-    boolean mask is ``True``.
-    """
-    if len(input_) != len(target_columns):
-        raise ValueError("Mismatched number of input and target columns.")
-
-    if len(input_) == 0:
-        return []
-
-    tbl = pylibcudf.copying.boolean_mask_scatter(
-        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_])
-        if isinstance(input_[0], Column)
-        else [(<DeviceScalar> as_device_scalar(i)).c_value for i in input_],
-        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-        boolean_mask.to_pylibcudf(mode="read"),
-    )
-
-    return columns_from_pylibcudf_table(tbl)
-
-
-@acquire_spill_lock()
-def shift(Column input, int offset, object fill_value=None):
-    cdef DeviceScalar fill
-
-    if isinstance(fill_value, DeviceScalar):
-        fill = fill_value
-    else:
-        fill = as_device_scalar(fill_value, input.dtype)
-
-    col = pylibcudf.copying.shift(
-        input.to_pylibcudf(mode="read"),
-        offset,
-        fill.c_value,
-    )
-    return Column.from_pylibcudf(col)
-
-
-@acquire_spill_lock()
-def get_element(Column input_column, size_type index):
-    return DeviceScalar.from_pylibcudf(
-        pylibcudf.copying.get_element(
-            input_column.to_pylibcudf(mode="read"),
-            index,
-        ),
-        dtype=input_column.dtype,
-    )
-
-
-class PackedColumns(Serializable):
-    """
-    A packed representation of a Frame, with all columns residing
-    in a single GPU memory buffer.
-    """
-
-    def __init__(
-        self,
-        PlcPackedColumns data,
-        object column_names = None,
-        object index_names = None,
-        object column_dtypes = None
-    ):
-        self._metadata, self._gpu_data = data.release()
-        self.column_names=column_names
-        self.index_names=index_names
-        self.column_dtypes=column_dtypes
-
-    def __reduce__(self):
-        return self.deserialize, self.serialize()
-
-    @property
-    def __cuda_array_interface__(self):
-        return self._gpu_data.__cuda_array_interface__
-
-    def serialize(self):
-        header = {}
-        frames = []
-        gpu_data = as_buffer(
-            data = self._gpu_data.obj.ptr,
-            size = self._gpu_data.obj.size,
-            owner=self,
-            exposed=True
-        )
-        data_header, data_frames = gpu_data.serialize()
-        header["data"] = data_header
-        frames.extend(data_frames)
-
-        header["column-names"] = self.column_names
-        header["index-names"] = self.index_names
-        header["metadata"] = self._metadata.tobytes()
-        for name, dtype in self.column_dtypes.items():
-            dtype_header, dtype_frames = dtype.device_serialize()
-            self.column_dtypes[name] = (
-                dtype_header,
-                (len(frames), len(frames) + len(dtype_frames)),
-            )
-            frames.extend(dtype_frames)
-        header["column-dtypes"] = self.column_dtypes
-        return header, frames
-
-    @classmethod
-    def deserialize(cls, header, frames):
-        column_dtypes = {}
-        for name, dtype in header["column-dtypes"].items():
-            dtype_header, (start, stop) = dtype
-            column_dtypes[name] = Serializable.device_deserialize(
-                dtype_header, frames[start:stop]
-            )
-        return cls(
-            plc.contiguous_split.pack(
-                plc.contiguous_split.unpack_from_memoryviews(
-                    memoryview(header["metadata"]),
-                    plc.gpumemoryview(frames[0]),
-                )
-            ),
-            header["column-names"],
-            header["index-names"],
-            column_dtypes,
-        )
-
-    @classmethod
-    def from_py_table(cls, input_table, keep_index=True):
-        if keep_index and (
-            not isinstance(input_table.index, cudf.RangeIndex)
-            or input_table.index.start != 0
-            or input_table.index.stop != len(input_table)
-            or input_table.index.step != 1
-        ):
-            columns = input_table._index._columns + input_table._columns
-            index_names = input_table._index_names
-        else:
-            columns = input_table._columns
-            index_names = None
-
-        column_names = input_table._column_names
-        column_dtypes = {}
-        for name, col in input_table._column_labels_and_values:
-            if isinstance(
-                col.dtype,
-                (cudf.core.dtypes._BaseDtype, cudf.core.dtypes.CategoricalDtype)
-            ):
-                column_dtypes[name] = col.dtype
-
-        return cls(
-            plc.contiguous_split.pack(
-                plc.Table(
-                    [
-                        col.to_pylibcudf(mode="read") for col in columns
-                    ]
-                )
-            ),
-            column_names,
-            index_names,
-            column_dtypes,
-        )
-
-    def unpack(self):
-        output_table = cudf.DataFrame._from_data(*data_from_pylibcudf_table(
-            plc.contiguous_split.unpack_from_memoryviews(
-                self._metadata,
-                self._gpu_data
-            ),
-            self.column_names,
-            self.index_names
-        ))
-        for name, dtype in self.column_dtypes.items():
-            output_table._data[name] = (
-                output_table._data[name]._with_type_metadata(dtype)
-            )
-
-        return output_table
-
-
-def pack(input_table, keep_index=True):
-    """
-    Pack the columns of a cudf Frame into a single GPU memory buffer.
-    """
-    return PackedColumns.from_py_table(input_table, keep_index)
-
-
-def unpack(packed):
-    """
-    Unpack the results of packing a cudf Frame returning a new
-    cudf Frame in the process.
-    """
-    return packed.unpack()
diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
deleted file mode 100644
index 1c9d3a01b80..00000000000
--- a/python/cudf/cudf/_lib/interop.pyx
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import pylibcudf
-
-from cudf._lib.utils cimport columns_from_pylibcudf_table
-
-from cudf.core.buffer import acquire_spill_lock
-from cudf.core.dtypes import ListDtype, StructDtype
-
-
-def from_dlpack(object dlpack_capsule):
-    """
-    Converts a DLPack Tensor PyCapsule into a list of columns.
-
-    DLPack Tensor PyCapsule is expected to have the name "dltensor".
-    """
-    return columns_from_pylibcudf_table(
-        pylibcudf.interop.from_dlpack(dlpack_capsule)
-    )
-
-
-def to_dlpack(list source_columns):
-    """
-    Converts a list of columns into a DLPack Tensor PyCapsule.
-
-    DLPack Tensor PyCapsule will have the name "dltensor".
-    """
-    return pylibcudf.interop.to_dlpack(
-        pylibcudf.Table(
-            [col.to_pylibcudf(mode="read") for col in source_columns]
-        )
-    )
-
-
-def gather_metadata(object cols_dtypes):
-    """
-    Generates a ColumnMetadata vector for each column.
-
-    Parameters
-    ----------
-    cols_dtypes : iterable
-        An iterable of ``(column_name, dtype)`` pairs.
-    """
-    cpp_metadata = []
-    if cols_dtypes is not None:
-        for idx, (col_name, col_dtype) in enumerate(cols_dtypes):
-            cpp_metadata.append(pylibcudf.interop.ColumnMetadata(col_name))
-            if isinstance(col_dtype, (ListDtype, StructDtype)):
-                _set_col_children_metadata(col_dtype, cpp_metadata[idx])
-    else:
-        raise TypeError(
-            "An iterable of (column_name, dtype) pairs is required to "
-            "construct column_metadata"
-        )
-    return cpp_metadata
-
-
-def _set_col_children_metadata(dtype, col_meta):
-    if isinstance(dtype, StructDtype):
-        for name, value in dtype.fields.items():
-            element_metadata = pylibcudf.interop.ColumnMetadata(name)
-            _set_col_children_metadata(value, element_metadata)
-            col_meta.children_meta.append(element_metadata)
-    elif isinstance(dtype, ListDtype):
-        # Offsets - child 0
-        col_meta.children_meta.append(pylibcudf.interop.ColumnMetadata())
-
-        # Element column - child 1
-        element_metadata = pylibcudf.interop.ColumnMetadata()
-        _set_col_children_metadata(dtype.element_type, element_metadata)
-        col_meta.children_meta.append(element_metadata)
-    else:
-        col_meta.children_meta.append(pylibcudf.interop.ColumnMetadata())
-
-
-@acquire_spill_lock()
-def to_arrow(list source_columns, object column_dtypes):
-    """Convert a list of columns from
-    cudf Frame to a PyArrow Table.
-
-    Parameters
-    ----------
-    source_columns : a list of columns to convert
-    column_dtypes : Iterable of ``(column_name, column_dtype)`` pairs
-
-    Returns
-    -------
-    pyarrow table
-    """
-    cpp_metadata = gather_metadata(column_dtypes)
-    return pylibcudf.interop.to_arrow(
-        pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
-        cpp_metadata,
-    )
-
-
-@acquire_spill_lock()
-def from_arrow(object input_table):
-    """Convert from PyArrow Table to a list of columns.
-
-    Parameters
-    ----------
-    input_table : PyArrow table
-
-    Returns
-    -------
-    A list of columns to construct Frame object
-    """
-    return columns_from_pylibcudf_table(
-        pylibcudf.interop.from_arrow(input_table)
-    )
diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index 0f9820ed1db..a3a8a14e70f 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -17,9 +17,6 @@ cdef class DeviceScalar:
     @staticmethod
     cdef DeviceScalar from_unique_ptr(unique_ptr[scalar] ptr, dtype=*)
 
-    @staticmethod
-    cdef DeviceScalar from_pylibcudf(pscalar, dtype=*)
-
     cdef void _set_dtype(self, dtype=*)
 
     cpdef bool is_valid(DeviceScalar s)
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 56712402919..3d3bdd730a8 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -218,7 +218,7 @@ cdef class DeviceScalar:
         return s
 
     @staticmethod
-    cdef DeviceScalar from_pylibcudf(pscalar, dtype=None):
+    def from_pylibcudf(pscalar, dtype=None):
         cdef DeviceScalar s = DeviceScalar.__new__(DeviceScalar)
         s.c_value = pscalar
         s._set_dtype(dtype)
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
deleted file mode 100644
index 1b8831940e3..00000000000
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ /dev/null
@@ -1,181 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_pylibcudf_table
-
-import pylibcudf
-
-
-@acquire_spill_lock()
-def drop_nulls(list columns, how="any", keys=None, thresh=None):
-    """
-    Drops null rows from cols depending on key columns.
-
-    Parameters
-    ----------
-    columns : list of columns
-    how  : "any" or "all". If thresh is None, drops rows of cols that have any
-           nulls or all nulls (respectively) in subset (default: "any")
-    keys : List of column indices. If set, then these columns are checked for
-           nulls rather than all of columns (optional)
-    thresh : Minimum number of non-nulls required to keep a row (optional)
-
-    Returns
-    -------
-    columns with null rows dropped
-    """
-    if how not in {"any", "all"}:
-        raise ValueError("how must be 'any' or 'all'")
-
-    keys = list(keys if keys is not None else range(len(columns)))
-
-    # Note: If how == "all" and thresh is specified this prioritizes thresh
-    if thresh is not None:
-        keep_threshold = thresh
-    elif how == "all":
-        keep_threshold = 1
-    else:
-        keep_threshold = len(keys)
-
-    return columns_from_pylibcudf_table(
-        pylibcudf.stream_compaction.drop_nulls(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in columns]),
-            keys,
-            keep_threshold,
-        )
-    )
-
-
-@acquire_spill_lock()
-def apply_boolean_mask(list columns, Column boolean_mask):
-    """
-    Drops the rows which correspond to False in boolean_mask.
-
-    Parameters
-    ----------
-    columns : list of columns whose rows are dropped as per boolean_mask
-    boolean_mask : a boolean column of same size as source_table
-
-    Returns
-    -------
-    columns obtained from applying mask
-    """
-    return columns_from_pylibcudf_table(
-        pylibcudf.stream_compaction.apply_boolean_mask(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in columns]),
-            boolean_mask.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-_keep_options = {
-    "first": pylibcudf.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
-    "last": pylibcudf.stream_compaction.DuplicateKeepOption.KEEP_LAST,
-    False: pylibcudf.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-}
-
-
-@acquire_spill_lock()
-def drop_duplicates(list columns,
-                    object keys=None,
-                    object keep='first',
-                    bool nulls_are_equal=True):
-    """
-    Drops rows in source_table as per duplicate rows in keys.
-
-    Parameters
-    ----------
-    columns : List of columns
-    keys : List of column indices. If set, then these columns are checked for
-           duplicates rather than all of columns (optional)
-    keep : keep 'first' or 'last' or none of the duplicate rows
-    nulls_are_equal : if True, nulls are treated equal else not.
-
-    Returns
-    -------
-    columns with duplicate dropped
-    """
-    if (keep_option := _keep_options.get(keep)) is None:
-        raise ValueError('keep must be either "first", "last" or False')
-
-    return columns_from_pylibcudf_table(
-        pylibcudf.stream_compaction.stable_distinct(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in columns]),
-            list(keys if keys is not None else range(len(columns))),
-            keep_option,
-            pylibcudf.types.NullEquality.EQUAL
-            if nulls_are_equal else pylibcudf.types.NullEquality.UNEQUAL,
-            pylibcudf.types.NanEquality.ALL_EQUAL,
-        )
-    )
-
-
-@acquire_spill_lock()
-def distinct_indices(
-    list columns,
-    object keep="first",
-    bool nulls_equal=True,
-    bool nans_equal=True,
-):
-    """
-    Return indices of the distinct rows in a table.
-
-    Parameters
-    ----------
-    columns : list of columns to check for duplicates
-    keep : treat "first", "last", or (False) none of any duplicate
-        rows as distinct
-    nulls_equal : Should nulls compare equal
-    nans_equal: Should nans compare equal
-
-    Returns
-    -------
-    Column of indices
-
-    See Also
-    --------
-    drop_duplicates
-    """
-    if (keep_option := _keep_options.get(keep)) is None:
-        raise ValueError('keep must be either "first", "last" or False')
-
-    return Column.from_pylibcudf(
-        pylibcudf.stream_compaction.distinct_indices(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in columns]),
-            keep_option,
-            pylibcudf.types.NullEquality.EQUAL
-            if nulls_equal else pylibcudf.types.NullEquality.UNEQUAL,
-            pylibcudf.types.NanEquality.ALL_EQUAL
-            if nans_equal else pylibcudf.types.NanEquality.UNEQUAL,
-        )
-    )
-
-
-@acquire_spill_lock()
-def distinct_count(Column source_column, ignore_nulls=True, nan_as_null=False):
-    """
-    Finds number of unique rows in `source_column`
-
-    Parameters
-    ----------
-    source_column : source table checked for unique rows
-    ignore_nulls : If True nulls are ignored,
-                   else counted as one more distinct value
-    nan_as_null  : If True, NAN is considered NULL,
-                   else counted as one more distinct value
-
-    Returns
-    -------
-    Count of number of unique rows in `source_column`
-    """
-    return pylibcudf.stream_compaction.distinct_count(
-        source_column.to_pylibcudf(mode="read"),
-        pylibcudf.types.NullPolicy.EXCLUDE
-        if ignore_nulls else pylibcudf.types.NullPolicy.INCLUDE,
-        pylibcudf.types.NanPolicy.NAN_IS_NULL
-        if nan_as_null else pylibcudf.types.NanPolicy.NAN_IS_VALID,
-    )
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
deleted file mode 100644
index 06ee07d8e2b..00000000000
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ /dev/null
@@ -1,598 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf._lib.column cimport Column
-
-import pylibcudf as plc
-from pylibcudf.types cimport DataType
-
-from cudf._lib.scalar import as_device_scalar
-
-from cudf._lib.types cimport dtype_to_pylibcudf_type
-
-
-def floating_to_string(Column input_col):
-    plc_column = plc.strings.convert.convert_floats.from_floats(
-        input_col.to_pylibcudf(mode="read"),
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def string_to_floating(Column input_col, DataType out_type):
-    plc_column = plc.strings.convert.convert_floats.to_floats(
-        input_col.to_pylibcudf(mode="read"),
-        out_type
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def dtos(Column input_col):
-    """
-    Converting/Casting input column of type double to string column
-
-    Parameters
-    ----------
-    input_col : input column of type double
-
-    Returns
-    -------
-    A Column with double values cast to string
-    """
-
-    return floating_to_string(input_col)
-
-
-def stod(Column input_col):
-    """
-    Converting/Casting input column of type string to double
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to double
-    """
-
-    return string_to_floating(input_col, plc.DataType(plc.TypeId.FLOAT64))
-
-
-def ftos(Column input_col):
-    """
-    Converting/Casting input column of type float to string column
-
-    Parameters
-    ----------
-    input_col : input column of type double
-
-    Returns
-    -------
-    A Column with float values cast to string
-    """
-
-    return floating_to_string(input_col)
-
-
-def stof(Column input_col):
-    """
-    Converting/Casting input column of type string to float
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to float
-    """
-
-    return string_to_floating(input_col, plc.DataType(plc.TypeId.FLOAT32))
-
-
-def integer_to_string(Column input_col):
-    plc_column = plc.strings.convert.convert_integers.from_integers(
-        input_col.to_pylibcudf(mode="read"),
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def string_to_integer(Column input_col, DataType out_type):
-    plc_column = plc.strings.convert.convert_integers.to_integers(
-        input_col.to_pylibcudf(mode="read"),
-        out_type
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def i8tos(Column input_col):
-    """
-    Converting/Casting input column of type int8 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type int8
-
-    Returns
-    -------
-    A Column with int8 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoi8(Column input_col):
-    """
-    Converting/Casting input column of type string to int8
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to int8
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.INT8))
-
-
-def i16tos(Column input_col):
-    """
-    Converting/Casting input column of type int16 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type int16
-
-    Returns
-    -------
-    A Column with int16 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoi16(Column input_col):
-    """
-    Converting/Casting input column of type string to int16
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to int16
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.INT16))
-
-
-def itos(Column input_col):
-    """
-    Converting/Casting input column of type int32 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type int32
-
-    Returns
-    -------
-    A Column with int32 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoi(Column input_col):
-    """
-    Converting/Casting input column of type string to int32
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to int32
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.INT32))
-
-
-def ltos(Column input_col):
-    """
-    Converting/Casting input column of type int64 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type int64
-
-    Returns
-    -------
-    A Column with int64 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stol(Column input_col):
-    """
-    Converting/Casting input column of type string to int64
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to int64
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.INT64))
-
-
-def ui8tos(Column input_col):
-    """
-    Converting/Casting input column of type uint8 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type uint8
-
-    Returns
-    -------
-    A Column with uint8 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoui8(Column input_col):
-    """
-    Converting/Casting input column of type string to uint8
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to uint8
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.UINT8))
-
-
-def ui16tos(Column input_col):
-    """
-    Converting/Casting input column of type uint16 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type uint16
-
-    Returns
-    -------
-    A Column with uint16 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoui16(Column input_col):
-    """
-    Converting/Casting input column of type string to uint16
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to uint16
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.UINT16))
-
-
-def uitos(Column input_col):
-    """
-    Converting/Casting input column of type uint32 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type uint32
-
-    Returns
-    -------
-    A Column with uint32 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoui(Column input_col):
-    """
-    Converting/Casting input column of type string to uint32
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to uint32
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.UINT32))
-
-
-def ultos(Column input_col):
-    """
-    Converting/Casting input column of type uint64 to string column
-
-    Parameters
-    ----------
-    input_col : input column of type uint64
-
-    Returns
-    -------
-    A Column with uint64 values cast to string
-    """
-
-    return integer_to_string(input_col)
-
-
-def stoul(Column input_col):
-    """
-    Converting/Casting input column of type string to uint64
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with strings cast to uint64
-    """
-
-    return string_to_integer(input_col, plc.DataType(plc.TypeId.UINT64))
-
-
-def to_booleans(Column input_col):
-    plc_column = plc.strings.convert.convert_booleans.to_booleans(
-        input_col.to_pylibcudf(mode="read"),
-        as_device_scalar("True").c_value,
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def from_booleans(Column input_col):
-    plc_column = plc.strings.convert.convert_booleans.from_booleans(
-        input_col.to_pylibcudf(mode="read"),
-        as_device_scalar("True").c_value,
-        as_device_scalar("False").c_value,
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def int2timestamp(
-        Column input_col,
-        str format,
-        Column names):
-    """
-    Converting/Casting input date-time column to string
-    column with specified format
-
-    Parameters
-    ----------
-    input_col : input column of type timestamp in integer format
-    format : The string specifying output format
-    names : The string names to use for weekdays ("%a", "%A") and
-    months ("%b", "%B")
-
-    Returns
-    -------
-    A Column with date-time represented in string format
-
-    """
-    return Column.from_pylibcudf(
-        plc.strings.convert.convert_datetime.from_timestamps(
-            input_col.to_pylibcudf(mode="read"),
-            format,
-            names.to_pylibcudf(mode="read")
-        )
-    )
-
-
-def timestamp2int(Column input_col, dtype, format):
-    """
-    Converting/Casting input string column to date-time column with specified
-    timestamp_format
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with string represented in date-time format
-
-    """
-    dtype = dtype_to_pylibcudf_type(dtype)
-    return Column.from_pylibcudf(
-        plc.strings.convert.convert_datetime.to_timestamps(
-            input_col.to_pylibcudf(mode="read"),
-            dtype,
-            format
-        )
-    )
-
-
-def istimestamp(Column input_col, str format):
-    """
-    Check input string column matches the specified timestamp format
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    format : format string of timestamp specifiers
-
-    Returns
-    -------
-    A Column of boolean values identifying strings that matched the format.
-
-    """
-    plc_column = plc.strings.convert.convert_datetime.is_timestamp(
-        input_col.to_pylibcudf(mode="read"),
-        format
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def timedelta2int(Column input_col, dtype, format):
-    """
-    Converting/Casting input string column to TimeDelta column with specified
-    format
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column with string represented in TimeDelta format
-
-    """
-    dtype = dtype_to_pylibcudf_type(dtype)
-    return Column.from_pylibcudf(
-        plc.strings.convert.convert_durations.to_durations(
-            input_col.to_pylibcudf(mode="read"),
-            dtype,
-            format
-        )
-    )
-
-
-def int2timedelta(Column input_col, str format):
-    """
-    Converting/Casting input Timedelta column to string
-    column with specified format
-
-    Parameters
-    ----------
-    input_col : input column of type Timedelta in integer format
-
-    Returns
-    -------
-    A Column with Timedelta represented in string format
-
-    """
-    return Column.from_pylibcudf(
-        plc.strings.convert.convert_durations.from_durations(
-            input_col.to_pylibcudf(mode="read"),
-            format
-        )
-    )
-
-
-def int2ip(Column input_col):
-    """
-    Converting/Casting integer column to string column in ipv4 format
-
-    Parameters
-    ----------
-    input_col : input integer column
-
-    Returns
-    -------
-    A Column with integer represented in string ipv4 format
-
-    """
-    plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
-        input_col.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def ip2int(Column input_col):
-    """
-    Converting string ipv4 column to integer column
-
-    Parameters
-    ----------
-    input_col : input string column
-
-    Returns
-    -------
-    A Column with ipv4 represented as integer
-
-    """
-    plc_column = plc.strings.convert.convert_ipv4.ipv4_to_integers(
-        input_col.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def is_ipv4(Column source_strings):
-    """
-    Returns a Column of boolean values with True for `source_strings`
-    that have strings in IPv4 format. This format is nnn.nnn.nnn.nnn
-    where nnn is integer digits in [0,255].
-    """
-    plc_column = plc.strings.convert.convert_ipv4.is_ipv4(
-        source_strings.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def htoi(Column input_col):
-    """
-    Converting input column of type string having hex values
-    to integer of out_type
-
-    Parameters
-    ----------
-    input_col : input column of type string
-
-    Returns
-    -------
-    A Column of integers parsed from hexadecimal string values.
-    """
-    plc_column = plc.strings.convert.convert_integers.hex_to_integers(
-        input_col.to_pylibcudf(mode="read"),
-        plc.DataType(plc.TypeId.INT64)
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def is_hex(Column source_strings):
-    """
-    Returns a Column of boolean values with True for `source_strings`
-    that have hex characters.
-    """
-    plc_column = plc.strings.convert.convert_integers.is_hex(
-        source_strings.to_pylibcudf(mode="read"),
-    )
-    return Column.from_pylibcudf(plc_column)
-
-
-def itoh(Column input_col):
-    """
-    Converting input column of type integer to a string
-    column with hexadecimal character digits.
-
-    Parameters
-    ----------
-    input_col : input column of type integer
-
-    Returns
-    -------
-    A Column of strings with hexadecimal characters.
-    """
-    plc_column = plc.strings.convert.convert_integers.integers_to_hex(
-        input_col.to_pylibcudf(mode="read"),
-    )
-    return Column.from_pylibcudf(plc_column)
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index 6cc52d046af..900be721c9a 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -1,22 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.column.column cimport column_view
-from pylibcudf.libcudf.table.table cimport table, table_view
-
-
-cdef data_from_unique_ptr(
-    unique_ptr[table] c_tbl, column_names, index_names=*)
 cpdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
 cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
-cdef data_from_table_view(
-    table_view tv, object owner, object column_names, object index_names=*)
-cdef table_view table_view_from_columns(columns) except *
-cdef table_view table_view_from_table(tbl, ignore_index=*) except*
-cdef columns_from_unique_ptr(unique_ptr[table] c_tbl)
-cdef columns_from_table_view(table_view tv, object owners)
 cpdef columns_from_pylibcudf_table(tbl)
 cpdef _data_from_columns(columns, column_names, index_names=*)
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index ff032656f80..975c9eb741c 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -1,233 +1,7 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import numpy as np
-import pyarrow as pa
-
 import cudf
 
-from cython.operator cimport dereference
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.column.column cimport column, column_view
-from pylibcudf.libcudf.table.table cimport table
-from pylibcudf.libcudf.table.table_view cimport table_view
-from pylibcudf.libcudf.types cimport size_type
-
 from cudf._lib.column cimport Column
-from pylibcudf cimport Column as plc_Column
-try:
-    import ujson as json
-except ImportError:
-    import json
-
-from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes, np_to_pa_dtype
-
-PARQUET_META_TYPE_MAP = {
-    str(cudf_dtype): str(pandas_dtype)
-    for cudf_dtype, pandas_dtype in np_dtypes_to_pandas_dtypes.items()
-}
-
-cdef table_view table_view_from_columns(columns) except*:
-    """Create a cudf::table_view from an iterable of Columns."""
-    cdef vector[column_view] column_views
-
-    cdef Column col
-    for col in columns:
-        column_views.push_back(col.view())
-
-    return table_view(column_views)
-
-
-cdef table_view table_view_from_table(tbl, ignore_index=False) except*:
-    """Create a cudf::table_view from a Table.
-
-    Parameters
-    ----------
-    ignore_index : bool, default False
-        If True, don't include the index in the columns.
-    """
-    return table_view_from_columns(
-        tbl._index._columns + tbl._columns
-        if not ignore_index and tbl._index is not None
-        else tbl._columns
-    )
-
-
-cpdef generate_pandas_metadata(table, index):
-    col_names = []
-    types = []
-    index_levels = []
-    index_descriptors = []
-    columns_to_convert = list(table._columns)
-    # Columns
-    for name, col in table._column_labels_and_values:
-        if cudf.get_option("mode.pandas_compatible"):
-            # in pandas-compat mode, non-string column names are stringified.
-            col_names.append(str(name))
-        else:
-            col_names.append(name)
-
-        if isinstance(col.dtype, cudf.CategoricalDtype):
-            raise ValueError(
-                "'category' column dtypes are currently not "
-                + "supported by the gpu accelerated parquet writer"
-            )
-        elif isinstance(col.dtype, (
-            cudf.ListDtype,
-            cudf.StructDtype,
-            cudf.core.dtypes.DecimalDtype
-        )):
-            types.append(col.dtype.to_arrow())
-        else:
-            # A boolean element takes 8 bits in cudf and 1 bit in
-            # pyarrow. To make sure the cudf format is interperable
-            # in arrow, we use `int8` type when converting from a
-            # cudf boolean array.
-            if col.dtype.type == np.bool_:
-                types.append(pa.int8())
-            else:
-                types.append(np_to_pa_dtype(col.dtype))
-
-    # Indexes
-    materialize_index = False
-    if index is not False:
-        for level, name in enumerate(table._index.names):
-            if isinstance(table._index, cudf.MultiIndex):
-                idx = table.index.get_level_values(level)
-            else:
-                idx = table.index
-
-            if isinstance(idx, cudf.RangeIndex):
-                if index is None:
-                    descr = {
-                        "kind": "range",
-                        "name": table.index.name,
-                        "start": table.index.start,
-                        "stop": table.index.stop,
-                        "step": table.index.step,
-                    }
-                else:
-                    materialize_index = True
-                    # When `index=True`, RangeIndex needs to be materialized.
-                    materialized_idx = idx._as_int_index()
-                    descr = _index_level_name(
-                        index_name=materialized_idx.name,
-                        level=level,
-                        column_names=col_names
-                    )
-                    index_levels.append(materialized_idx)
-                    columns_to_convert.append(materialized_idx._values)
-                    col_names.append(descr)
-                    types.append(np_to_pa_dtype(materialized_idx.dtype))
-            else:
-                descr = _index_level_name(
-                    index_name=idx.name,
-                    level=level,
-                    column_names=col_names
-                )
-                columns_to_convert.append(idx._values)
-                col_names.append(descr)
-                if isinstance(idx.dtype, cudf.CategoricalDtype):
-                    raise ValueError(
-                        "'category' column dtypes are currently not "
-                        + "supported by the gpu accelerated parquet writer"
-                    )
-                elif isinstance(idx.dtype, cudf.ListDtype):
-                    types.append(col.dtype.to_arrow())
-                else:
-                    # A boolean element takes 8 bits in cudf and 1 bit in
-                    # pyarrow. To make sure the cudf format is interperable
-                    # in arrow, we use `int8` type when converting from a
-                    # cudf boolean array.
-                    if idx.dtype.type == np.bool_:
-                        types.append(pa.int8())
-                    else:
-                        types.append(np_to_pa_dtype(idx.dtype))
-
-                index_levels.append(idx)
-            index_descriptors.append(descr)
-
-    df_meta = table.head(0)
-    if materialize_index:
-        df_meta.index = df_meta.index._as_int_index()
-    metadata = pa.pandas_compat.construct_metadata(
-        columns_to_convert=columns_to_convert,
-        # It is OKAY to do `.head(0).to_pandas()` because
-        # this method will extract `.columns` metadata only
-        df=df_meta.to_pandas(),
-        column_names=col_names,
-        index_levels=index_levels,
-        index_descriptors=index_descriptors,
-        preserve_index=index,
-        types=types,
-    )
-
-    md_dict = json.loads(metadata[b"pandas"])
-
-    # correct metadata for list and struct and nullable numeric types
-    for col_meta in md_dict["columns"]:
-        if (
-            col_meta["name"] in table._column_names
-            and table._data[col_meta["name"]].nullable
-            and col_meta["numpy_type"] in PARQUET_META_TYPE_MAP
-            and col_meta["pandas_type"] != "decimal"
-        ):
-            col_meta["numpy_type"] = PARQUET_META_TYPE_MAP[
-                col_meta["numpy_type"]
-            ]
-        if col_meta["numpy_type"] in ("list", "struct"):
-            col_meta["numpy_type"] = "object"
-
-    return json.dumps(md_dict)
-
-
-def _index_level_name(index_name, level, column_names):
-    """
-    Return the name of an index level or a default name
-    if `index_name` is None or is already a column name.
-
-    Parameters
-    ----------
-    index_name : name of an Index object
-    level : level of the Index object
-
-    Returns
-    -------
-    name : str
-    """
-    if index_name is not None and index_name not in column_names:
-        return index_name
-    else:
-        return f"__index_level_{level}__"
-
-
-cdef columns_from_unique_ptr(
-    unique_ptr[table] c_tbl
-):
-    """Convert a libcudf table into list of columns.
-
-    Parameters
-    ----------
-    c_tbl : unique_ptr[cudf::table]
-        The libcudf table whose columns will be extracted
-
-    Returns
-    -------
-    list[Column]
-        A list of columns.
-    """
-    cdef vector[unique_ptr[column]] c_columns = move(c_tbl.get().release())
-    cdef vector[unique_ptr[column]].iterator it = c_columns.begin()
-
-    cdef size_t i
-
-    return [
-        Column.from_pylibcudf(
-            plc_Column.from_libcudf(move(dereference(it+i)))
-        ) for i in range(c_columns.size())
-    ]
 
 
 cpdef columns_from_pylibcudf_table(tbl):
@@ -281,8 +55,7 @@ cpdef _data_from_columns(columns, column_names, index_names=None):
         # the data while actually constructing the Index object here (instead
         # of just returning a dict for that as well). As we clean up the
         # Frame factories we may want to look for a less dissonant approach
-        # that does not impose performance penalties. The same applies to
-        # data_from_table_view below.
+        # that does not impose performance penalties.
         cudf.core.index._index_from_data(
             {
                 name: columns[i]
@@ -300,16 +73,6 @@ cpdef _data_from_columns(columns, column_names, index_names=None):
     return data, index
 
 
-cdef data_from_unique_ptr(
-    unique_ptr[table] c_tbl, column_names, index_names=None
-):
-    return _data_from_columns(
-        columns_from_unique_ptr(move(c_tbl)),
-        column_names,
-        index_names
-    )
-
-
 cpdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
     return _data_from_columns(
         columns_from_pylibcudf_table(tbl),
@@ -329,73 +92,3 @@ cpdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None)
         column_names=column_names,
         index_names=index_names
     )
-
-cdef columns_from_table_view(
-    table_view tv,
-    object owners,
-):
-    """
-    Given a ``cudf::table_view``, constructs a list of columns from it,
-    along with referencing an owner Python object that owns the memory
-    lifetime. owner must be either None or a list of column. If owner
-    is a list of columns, the owner of the `i`th ``cudf::column_view``
-    in the table view is ``owners[i]``. For more about memory ownership,
-    see ``Column.from_column_view``.
-    """
-
-    return [
-        Column.from_column_view(
-            tv.column(i), owners[i] if isinstance(owners, list) else None
-        ) for i in range(tv.num_columns())
-    ]
-
-cdef data_from_table_view(
-    table_view tv,
-    object owner,
-    object column_names,
-    object index_names=None
-):
-    """
-    Given a ``cudf::table_view``, constructs a Frame from it,
-    along with referencing an ``owner`` Python object that owns the memory
-    lifetime. If ``owner`` is a Frame we reach inside of it and
-    reach inside of each ``cudf.Column`` to make the owner of each newly
-    created ``Buffer`` underneath the ``cudf.Column`` objects of the
-    created Frame the respective ``Buffer`` from the relevant
-    ``cudf.Column`` of the ``owner`` Frame
-    """
-    cdef size_type column_idx = 0
-    table_owner = isinstance(owner, cudf.core.frame.Frame)
-
-    # First construct the index, if any
-    index = None
-    if index_names is not None:
-        index_columns = []
-        for _ in index_names:
-            column_owner = owner
-            if table_owner:
-                column_owner = owner._index._columns[column_idx]
-            index_columns.append(
-                Column.from_column_view(
-                    tv.column(column_idx),
-                    column_owner
-                )
-            )
-            column_idx += 1
-        index = cudf.core.index._index_from_data(
-            dict(zip(index_names, index_columns)))
-
-    # Construct the data dict
-    cdef size_type source_column_idx = 0
-    data_columns = []
-    for _ in column_names:
-        column_owner = owner
-        if table_owner:
-            column_owner = owner._columns[source_column_idx]
-        data_columns.append(
-            Column.from_column_view(tv.column(column_idx), column_owner)
-        )
-        column_idx += 1
-        source_column_idx += 1
-
-    return dict(zip(column_names, data_columns)), index
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index ed48fbf5c5a..c2f3c782d10 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -10,17 +10,18 @@
 from typing_extensions import Self
 
 import cudf
-from cudf._lib.copying import _gather_map_is_valid, gather
-from cudf._lib.stream_compaction import (
+from cudf._lib.types import size_type_dtype
+from cudf.api.extensions import no_default
+from cudf.api.types import is_integer, is_list_like, is_scalar
+from cudf.core._internals import copying
+from cudf.core._internals.stream_compaction import (
     apply_boolean_mask,
     drop_duplicates,
     drop_nulls,
 )
-from cudf._lib.types import size_type_dtype
-from cudf.api.extensions import no_default
-from cudf.api.types import is_integer, is_list_like, is_scalar
 from cudf.core.abc import Serializable
 from cudf.core.column import ColumnBase, column
+from cudf.core.copy_types import GatherMap
 from cudf.errors import MixedTypeError
 from cudf.utils import ioutils
 from cudf.utils.dtypes import can_convert_to_column, is_mixed_with_object_dtype
@@ -413,7 +414,7 @@ def hasnans(self):
         raise NotImplementedError
 
     @property
-    def nlevels(self):
+    def nlevels(self) -> int:
         """
         Number of levels.
         """
@@ -1943,7 +1944,6 @@ def drop_duplicates(
         return self._from_columns_like_self(
             drop_duplicates(
                 list(self._columns),
-                keys=range(len(self._columns)),
                 keep=keep,
                 nulls_are_equal=nulls_are_equal,
             ),
@@ -2032,7 +2032,6 @@ def dropna(self, how="any"):
             drop_nulls(
                 data_columns,
                 how=how,
-                keys=range(len(data_columns)),
             ),
             self._column_names,
         )
@@ -2050,13 +2049,9 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
         if gather_map.dtype.kind not in "iu":
             gather_map = gather_map.astype(size_type_dtype)
 
-        if not _gather_map_is_valid(
-            gather_map, len(self), check_bounds, nullify
-        ):
-            raise IndexError("Gather map index is out of bounds.")
-
+        GatherMap(gather_map, len(self), nullify=not check_bounds or nullify)
         return self._from_columns_like_self(
-            gather(list(self._columns), gather_map, nullify=nullify),
+            copying.gather(self._columns, gather_map, nullify=nullify),
             self._column_names,
         )
 
diff --git a/python/cudf/cudf/core/_internals/copying.py b/python/cudf/cudf/core/_internals/copying.py
new file mode 100644
index 00000000000..34c1850cb72
--- /dev/null
+++ b/python/cudf/cudf/core/_internals/copying.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pylibcudf as plc
+
+import cudf
+from cudf.core.buffer import acquire_spill_lock
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from cudf.core.column import ColumnBase
+    from cudf.core.column.numerical import NumericalColumn
+
+
+@acquire_spill_lock()
+def gather(
+    columns: Iterable[ColumnBase],
+    gather_map: NumericalColumn,
+    nullify: bool = False,
+) -> list[ColumnBase]:
+    plc_tbl = plc.copying.gather(
+        plc.Table([col.to_pylibcudf(mode="read") for col in columns]),
+        gather_map.to_pylibcudf(mode="read"),
+        plc.copying.OutOfBoundsPolicy.NULLIFY
+        if nullify
+        else plc.copying.OutOfBoundsPolicy.DONT_CHECK,
+    )
+    return [
+        cudf._lib.column.Column.from_pylibcudf(col)
+        for col in plc_tbl.columns()
+    ]
+
+
+@acquire_spill_lock()
+def scatter(
+    sources: list[ColumnBase | cudf.Scalar],
+    scatter_map: NumericalColumn,
+    target_columns: list[ColumnBase],
+    bounds_check: bool = True,
+):
+    """
+    Scattering source into target as per the scatter map.
+    `source` can be a list of scalars, or a list of columns. The number of
+    items in `sources` must equal the number of `target_columns` to scatter.
+    """
+    # TODO: Only single column scatter is used, we should explore multi-column
+    # scatter for frames for performance increase.
+
+    if len(sources) != len(target_columns):
+        raise ValueError("Mismatched number of source and target columns.")
+
+    if len(sources) == 0:
+        return []
+
+    if bounds_check:
+        n_rows = len(target_columns[0])
+        if not (
+            (scatter_map >= -n_rows).all() and (scatter_map < n_rows).all()
+        ):
+            raise IndexError(
+                f"index out of bounds for column of size {n_rows}"
+            )
+
+    plc_tbl = plc.copying.scatter(
+        plc.Table([col.to_pylibcudf(mode="read") for col in sources])  # type: ignore[union-attr]
+        if isinstance(sources[0], cudf._lib.column.Column)
+        else [slr.device_value.c_value for slr in sources],  # type: ignore[union-attr]
+        scatter_map.to_pylibcudf(mode="read"),
+        plc.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
+    )
+
+    return [
+        cudf._lib.column.Column.from_pylibcudf(col)
+        for col in plc_tbl.columns()
+    ]
+
+
+@acquire_spill_lock()
+def columns_split(
+    input_columns: Iterable[ColumnBase], splits: list[int]
+) -> list[list[ColumnBase]]:
+    return [
+        [
+            cudf._lib.column.Column.from_pylibcudf(col)
+            for col in plc_tbl.columns()
+        ]
+        for plc_tbl in plc.copying.split(
+            plc.Table(
+                [col.to_pylibcudf(mode="read") for col in input_columns]
+            ),
+            splits,
+        )
+    ]
diff --git a/python/cudf/cudf/core/_internals/stream_compaction.py b/python/cudf/cudf/core/_internals/stream_compaction.py
new file mode 100644
index 00000000000..4ccc26c2a1c
--- /dev/null
+++ b/python/cudf/cudf/core/_internals/stream_compaction.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal
+
+import pylibcudf as plc
+
+from cudf._lib.column import Column
+from cudf.core.buffer import acquire_spill_lock
+
+if TYPE_CHECKING:
+    from cudf.core.column import ColumnBase
+
+
+@acquire_spill_lock()
+def drop_nulls(
+    columns: list[ColumnBase],
+    how: Literal["any", "all"] = "any",
+    keys: list[int] | None = None,
+    thresh: int | None = None,
+) -> list[ColumnBase]:
+    """
+    Drops null rows from cols depending on key columns.
+
+    Parameters
+    ----------
+    columns : list of columns
+    how  : "any" or "all". If thresh is None, drops rows of cols that have any
+           nulls or all nulls (respectively) in subset (default: "any")
+    keys : List of column indices. If set, then these columns are checked for
+           nulls rather than all of columns (optional)
+    thresh : Minimum number of non-nulls required to keep a row (optional)
+
+    Returns
+    -------
+    columns with null rows dropped
+    """
+    if how not in {"any", "all"}:
+        raise ValueError("how must be 'any' or 'all'")
+
+    keys = keys if keys is not None else list(range(len(columns)))
+
+    # Note: If how == "all" and thresh is specified this prioritizes thresh
+    if thresh is not None:
+        keep_threshold = thresh
+    elif how == "all":
+        keep_threshold = 1
+    else:
+        keep_threshold = len(keys)
+
+    plc_table = plc.stream_compaction.drop_nulls(
+        plc.Table([col.to_pylibcudf(mode="read") for col in columns]),
+        keys,
+        keep_threshold,
+    )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()]
+
+
+@acquire_spill_lock()
+def apply_boolean_mask(
+    columns: list[ColumnBase], boolean_mask: ColumnBase
+) -> list[ColumnBase]:
+    """
+    Drops the rows which correspond to False in boolean_mask.
+
+    Parameters
+    ----------
+    columns : list of columns whose rows are dropped as per boolean_mask
+    boolean_mask : a boolean column of same size as source_table
+
+    Returns
+    -------
+    columns obtained from applying mask
+    """
+    plc_table = plc.stream_compaction.apply_boolean_mask(
+        plc.Table([col.to_pylibcudf(mode="read") for col in columns]),
+        boolean_mask.to_pylibcudf(mode="read"),
+    )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()]
+
+
+@acquire_spill_lock()
+def drop_duplicates(
+    columns: list[ColumnBase],
+    keys: list[int] | None = None,
+    keep: Literal["first", "last", False] = "first",
+    nulls_are_equal: bool = True,
+) -> list[ColumnBase]:
+    """
+    Drops rows in source_table as per duplicate rows in keys.
+
+    Parameters
+    ----------
+    columns : List of columns
+    keys : List of column indices. If set, then these columns are checked for
+           duplicates rather than all of columns (optional)
+    keep : keep 'first' or 'last' or none of the duplicate rows
+    nulls_are_equal : if True, nulls are treated equal else not.
+
+    Returns
+    -------
+    columns with duplicate dropped
+    """
+    _keep_options = {
+        "first": plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
+        "last": plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
+        False: plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
+    }
+    if (keep_option := _keep_options.get(keep)) is None:
+        raise ValueError('keep must be either "first", "last" or False')
+
+    plc_table = plc.stream_compaction.stable_distinct(
+        plc.Table([col.to_pylibcudf(mode="read") for col in columns]),
+        keys if keys is not None else list(range(len(columns))),
+        keep_option,
+        plc.types.NullEquality.EQUAL
+        if nulls_are_equal
+        else plc.types.NullEquality.UNEQUAL,
+        plc.types.NanEquality.ALL_EQUAL,
+    )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()]
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a0cf38c6f51..d9b54008e85 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1095,17 +1095,22 @@ def as_categorical_column(self, dtype: Dtype) -> Self:
             raise ValueError("dtype must be CategoricalDtype")
 
         if not isinstance(self.categories, type(dtype.categories._column)):
-            # If both categories are of different Column types,
-            # return a column full of Nulls.
-            codes = cast(
-                cudf.core.column.numerical.NumericalColumn,
-                column.as_column(
-                    _DEFAULT_CATEGORICAL_VALUE,
-                    length=self.size,
-                    dtype=self.codes.dtype,
-                ),
-            )
-            codes = as_unsigned_codes(len(dtype.categories), codes)
+            if isinstance(
+                self.categories.dtype, cudf.StructDtype
+            ) and isinstance(dtype.categories.dtype, cudf.IntervalDtype):
+                codes = self.codes
+            else:
+                # Otherwise if both categories are of different Column types,
+                # return a column full of nulls.
+                codes = cast(
+                    cudf.core.column.numerical.NumericalColumn,
+                    column.as_column(
+                        _DEFAULT_CATEGORICAL_VALUE,
+                        length=self.size,
+                        dtype=self.codes.dtype,
+                    ),
+                )
+                codes = as_unsigned_codes(len(dtype.categories), codes)
             return type(self)(
                 data=self.data,  # type: ignore[arg-type]
                 size=self.size,
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index cc07af0f669..cccafaeba88 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -26,12 +26,6 @@
 from cudf import _lib as libcudf
 from cudf._lib.column import Column
 from cudf._lib.scalar import as_device_scalar
-from cudf._lib.stream_compaction import (
-    apply_boolean_mask,
-    distinct_count as cpp_distinct_count,
-    drop_duplicates,
-    drop_nulls,
-)
 from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -42,7 +36,12 @@
     is_string_dtype,
 )
 from cudf.core._compat import PANDAS_GE_210
-from cudf.core._internals import aggregation, sorting, unary
+from cudf.core._internals import aggregation, copying, sorting, unary
+from cudf.core._internals.stream_compaction import (
+    apply_boolean_mask,
+    drop_duplicates,
+    drop_nulls,
+)
 from cudf.core._internals.timezones import get_compatible_timezone
 from cudf.core.abc import Serializable
 from cudf.core.buffer import (
@@ -51,6 +50,7 @@
     as_buffer,
     cuda_array_interface_wrapper,
 )
+from cudf.core.copy_types import GatherMap
 from cudf.core.dtypes import (
     CategoricalDtype,
     DecimalDtype,
@@ -77,6 +77,7 @@
     import builtins
 
     from cudf._typing import ColumnLike, Dtype, ScalarLike
+    from cudf.core.column.numerical import NumericalColumn
 
 if PANDAS_GE_210:
     NumpyExtensionArray = pd.arrays.NumpyExtensionArray
@@ -274,10 +275,11 @@ def any(self, skipna: bool = True) -> bool:
 
     def dropna(self) -> Self:
         if self.has_nulls():
-            return drop_nulls([self])[0]._with_type_metadata(self.dtype)
+            return drop_nulls([self])[0]._with_type_metadata(self.dtype)  # type: ignore[return-value]
         else:
             return self.copy()
 
+    @acquire_spill_lock()
     def to_arrow(self) -> pa.Array:
         """Convert to PyArrow Array
 
@@ -294,9 +296,7 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        return libcudf.interop.to_arrow([self], [("None", self.dtype)])[
-            "None"
-        ].chunk(0)
+        return plc.interop.to_arrow(self.to_pylibcudf(mode="read")).chunk(0)
 
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
@@ -333,26 +333,33 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
 
         if isinstance(array.type, pa.DictionaryType):
             indices_table = pa.table(
-                {
-                    "None": pa.chunked_array(
-                        [chunk.indices for chunk in data["None"].chunks],
+                [
+                    pa.chunked_array(
+                        [chunk.indices for chunk in data.column(0).chunks],
                         type=array.type.index_type,
                     )
-                }
+                ],
+                [None],
             )
             dictionaries_table = pa.table(
-                {
-                    "None": pa.chunked_array(
-                        [chunk.dictionary for chunk in data["None"].chunks],
+                [
+                    pa.chunked_array(
+                        [chunk.dictionary for chunk in data.column(0).chunks],
                         type=array.type.value_type,
                     )
-                }
+                ],
+                [None],
             )
-
-            codes = libcudf.interop.from_arrow(indices_table)[0]
-            categories = libcudf.interop.from_arrow(dictionaries_table)[0]
+            with acquire_spill_lock():
+                codes = cls.from_pylibcudf(
+                    plc.interop.from_arrow(indices_table).columns()[0]
+                )
+                categories = cls.from_pylibcudf(
+                    plc.interop.from_arrow(dictionaries_table).columns()[0]
+                )
             codes = cudf.core.column.categorical.as_unsigned_codes(
-                len(categories), codes
+                len(categories),
+                codes,  # type: ignore[arg-type]
             )
             return cudf.core.column.CategoricalColumn(
                 data=None,
@@ -363,10 +370,14 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 mask=codes.base_mask,
                 children=(codes,),
             )
-
-        result = libcudf.interop.from_arrow(data)[0]
-
-        return result._with_type_metadata(cudf_dtype_from_pa_type(array.type))
+        else:
+            result = cls.from_pylibcudf(
+                plc.interop.from_arrow(data).columns()[0]
+            )
+            # TODO: cudf_dtype_from_pa_type may be less necessary for some types
+            return result._with_type_metadata(
+                cudf_dtype_from_pa_type(array.type)
+            )
 
     @acquire_spill_lock()
     def _get_mask_as_column(self) -> ColumnBase:
@@ -431,8 +442,16 @@ def _fill(
             )
         return self
 
-    def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
-        return libcudf.copying.shift(self, offset, fill_value)
+    @acquire_spill_lock()
+    def shift(self, offset: int, fill_value: ScalarLike) -> Self:
+        if not isinstance(fill_value, cudf.Scalar):
+            fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
+        plc_col = plc.copying.shift(
+            self.to_pylibcudf(mode="read"),
+            offset,
+            fill_value.device_value.c_value,
+        )
+        return type(self).from_pylibcudf(plc_col)  # type: ignore[return-value]
 
     @property
     def nullmask(self) -> Buffer:
@@ -460,8 +479,11 @@ def copy(self, deep: bool = True) -> Self:
             them.
         """
         if deep:
-            result = libcudf.copying.copy_column(self)
-            return result._with_type_metadata(self.dtype)
+            with acquire_spill_lock():
+                result = type(self).from_pylibcudf(
+                    self.to_pylibcudf(mode="read").copy()
+                )
+            return result._with_type_metadata(self.dtype)  # type: ignore[return-value]
         else:
             return cast(
                 Self,
@@ -542,7 +564,15 @@ def element_indexing(self, index: int):
             idx = len(self) + idx
         if idx > len(self) - 1 or idx < 0:
             raise IndexError("single positional indexer is out-of-bounds")
-        return libcudf.copying.get_element(self, idx).value
+        with acquire_spill_lock():
+            dscalar = libcudf.scalar.DeviceScalar.from_pylibcudf(
+                plc.copying.get_element(
+                    self.to_pylibcudf(mode="read"),
+                    idx,
+                ),
+                dtype=self.dtype,
+            )
+        return dscalar.value
 
     def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
         stride = 1 if stride is None else stride
@@ -554,9 +584,15 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
             return cast(Self, column_empty(0, self.dtype))
         # compute mask slice
         if stride == 1:
-            return libcudf.copying.column_slice(self, [start, stop])[
-                0
-            ]._with_type_metadata(self.dtype)
+            with acquire_spill_lock():
+                result = [
+                    type(self).from_pylibcudf(col)
+                    for col in plc.copying.slice(
+                        self.to_pylibcudf(mode="read"),
+                        [start, stop],
+                    )
+                ]
+            return result[0]._with_type_metadata(self.dtype)  # type: ignore[return-value]
         else:
             # Need to create a gather map for given slice with stride
             gather_map = as_column(
@@ -625,9 +661,16 @@ def _scatter_by_slice(
             if isinstance(value, cudf.core.scalar.Scalar):
                 return self._fill(value, start, stop, inplace=True)
             else:
-                return libcudf.copying.copy_range(
-                    value, self, 0, num_keys, start, stop, False
-                )
+                with acquire_spill_lock():
+                    return type(self).from_pylibcudf(  # type: ignore[return-value]
+                        plc.copying.copy_range(
+                            value.to_pylibcudf(mode="read"),
+                            self.to_pylibcudf(mode="read"),
+                            0,
+                            num_keys,
+                            start,
+                        )
+                    )
 
         # step != 1, create a scatter map with arange
         scatter_map = cast(
@@ -671,11 +714,21 @@ def _scatter_by_column(
         self._check_scatter_key_length(num_keys, value)
 
         if key.dtype.kind == "b":
-            return libcudf.copying.boolean_mask_scatter([value], [self], key)[
-                0
-            ]._with_type_metadata(self.dtype)
+            with acquire_spill_lock():
+                plc_table = plc.copying.boolean_mask_scatter(
+                    plc.Table([value.to_pylibcudf(mode="read")])
+                    if isinstance(value, Column)
+                    else [value.device_value.c_value],
+                    plc.Table([self.to_pylibcudf(mode="read")]),
+                    key.to_pylibcudf(mode="read"),
+                )
+                return (
+                    type(self)  # type: ignore[return-value]
+                    .from_pylibcudf(plc_table.columns()[0])
+                    ._with_type_metadata(self.dtype)
+                )
         else:
-            return libcudf.copying.scatter([value], key, [self])[
+            return copying.scatter([value], key, [self])[
                 0
             ]._with_type_metadata(self.dtype)
 
@@ -805,7 +858,7 @@ def indices_of(
         else:
             value = as_column(value, dtype=self.dtype, length=1)
         mask = value.contains(self)
-        return apply_boolean_mask(
+        return apply_boolean_mask(  # type: ignore[return-value]
             [as_column(range(0, len(self)), dtype=size_type_dtype)], mask
         )[0]
 
@@ -887,14 +940,9 @@ def take(
         # be done by the caller. This check will be removed in future release.
         if indices.dtype.kind not in {"u", "i"}:
             indices = indices.astype(libcudf.types.size_type_dtype)
-        if not libcudf.copying._gather_map_is_valid(
-            indices, len(self), check_bounds, nullify
-        ):
-            raise IndexError("Gather map index is out of bounds.")
-
-        return libcudf.copying.gather([self], indices, nullify=nullify)[
-            0
-        ]._with_type_metadata(self.dtype)
+        GatherMap(indices, len(self), nullify=not check_bounds or nullify)
+        gathered = copying.gather([self], indices, nullify=nullify)  # type: ignore[arg-type]
+        return gathered[0]._with_type_metadata(self.dtype)  # type: ignore[return-value]
 
     def isin(self, values: Sequence) -> ColumnBase:
         """Check whether values are contained in the Column.
@@ -1045,9 +1093,15 @@ def distinct_count(self, dropna: bool = True) -> int:
         try:
             return self._distinct_count[dropna]
         except KeyError:
-            self._distinct_count[dropna] = cpp_distinct_count(
-                self, ignore_nulls=dropna
-            )
+            with acquire_spill_lock():
+                result = plc.stream_compaction.distinct_count(
+                    self.to_pylibcudf(mode="read"),
+                    plc.types.NullPolicy.EXCLUDE
+                    if dropna
+                    else plc.types.NullPolicy.INCLUDE,
+                    plc.types.NanPolicy.NAN_IS_VALID,
+                )
+            self._distinct_count[dropna] = result
             return self._distinct_count[dropna]
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
@@ -1276,7 +1330,7 @@ def unique(self) -> Self:
         if self.is_unique:
             return self.copy()
         else:
-            return drop_duplicates([self], keep="first")[
+            return drop_duplicates([self], keep="first")[  # type: ignore[return-value]
                 0
             ]._with_type_metadata(self.dtype)
 
@@ -1507,20 +1561,33 @@ def _return_sentinel_column():
         left_gather_map = type(self).from_pylibcudf(left_rows)
         right_gather_map = type(self).from_pylibcudf(right_rows)
 
-        codes = libcudf.copying.gather(
-            [as_column(range(len(cats)), dtype=dtype)],
-            right_gather_map,
-            nullify=True,
+        codes = as_column(range(len(cats)), dtype=dtype).take(
+            right_gather_map, nullify=True
         )
         del right_gather_map
         del right_rows
         # reorder `codes` so that its values correspond to the
         # values of `self`:
         (codes,) = sorting.sort_by_key(
-            codes, [left_gather_map], [True], ["last"], stable=True
+            [codes], [left_gather_map], [True], ["last"], stable=True
         )
         return codes.fillna(na_sentinel.value)
 
+    @acquire_spill_lock()
+    def copy_if_else(
+        self, other: Self | cudf.Scalar, boolean_mask: NumericalColumn
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.copying.copy_if_else(
+                self.to_pylibcudf(mode="read"),
+                other.device_value.c_value
+                if isinstance(other, cudf.Scalar)
+                else other.to_pylibcudf(mode="read"),
+                boolean_mask.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
     def one_hot_encode(
         self, categories: ColumnBase
     ) -> abc.Generator[ColumnBase]:
@@ -2024,18 +2091,26 @@ def as_column(
             if isinstance(arbitrary.dtype, pd.DatetimeTZDtype):
                 new_tz = get_compatible_timezone(arbitrary.dtype)
                 arbitrary = arbitrary.astype(new_tz)
-            if isinstance(arbitrary.dtype, pd.CategoricalDtype) and isinstance(
-                arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype
-            ):
-                new_tz = get_compatible_timezone(
-                    arbitrary.dtype.categories.dtype
-                )
-                new_cats = arbitrary.dtype.categories.astype(new_tz)
-                new_dtype = pd.CategoricalDtype(
-                    categories=new_cats, ordered=arbitrary.dtype.ordered
-                )
-                arbitrary = arbitrary.astype(new_dtype)
-
+            if isinstance(arbitrary.dtype, pd.CategoricalDtype):
+                if isinstance(
+                    arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype
+                ):
+                    new_tz = get_compatible_timezone(
+                        arbitrary.dtype.categories.dtype
+                    )
+                    new_cats = arbitrary.dtype.categories.astype(new_tz)
+                    new_dtype = pd.CategoricalDtype(
+                        categories=new_cats, ordered=arbitrary.dtype.ordered
+                    )
+                    arbitrary = arbitrary.astype(new_dtype)
+                elif (
+                    isinstance(
+                        arbitrary.dtype.categories.dtype, pd.IntervalDtype
+                    )
+                    and dtype is None
+                ):
+                    # Conversion to arrow converts IntervalDtype to StructDtype
+                    dtype = cudf.CategoricalDtype.from_pandas(arbitrary.dtype)
             return as_column(
                 pa.array(arbitrary, from_pandas=True),
                 nan_as_null=nan_as_null,
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 81b82040b8d..b6a4122ebb9 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -19,7 +19,6 @@
 
 import cudf
 import cudf.core.column.column as column
-import cudf.core.column.string as string
 from cudf import _lib as libcudf
 from cudf.core._compat import PANDAS_GE_220
 from cudf.core._internals import binaryop, unary
@@ -354,9 +353,7 @@ def is_year_end(self) -> ColumnBase:
 
         leap = day_of_year == cudf.Scalar(366)
         non_leap = day_of_year == cudf.Scalar(365)
-        return libcudf.copying.copy_if_else(leap, non_leap, leap_dates).fillna(
-            False
-        )
+        return leap.copy_if_else(non_leap, leap_dates).fillna(False)
 
     @property
     def is_leap_year(self) -> ColumnBase:
@@ -604,9 +601,14 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
             names = as_column(_DATETIME_NAMES)
         else:
             names = column.column_empty(0, dtype="object")
-        return string._datetime_to_str_typecast_functions[self.dtype](
-            self, format, names
-        )
+        with acquire_spill_lock():
+            return type(self).from_pylibcudf(  # type: ignore[return-value]
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    self.to_pylibcudf(mode="read"),
+                    format,
+                    names.to_pylibcudf(mode="read"),
+                )
+            )
 
     def as_string_column(self) -> cudf.core.column.StringColumn:
         format = _dtype_to_format_conversion.get(
@@ -1014,7 +1016,7 @@ def to_pandas(
                 self.dtype.tz, ambiguous="NaT", nonexistent="NaT"
             )
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         return pa.compute.assume_timezone(
             self._local_time.to_arrow(), str(self.dtype.tz)
         )
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 9e6a73f1a9c..09941665ba2 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -269,8 +269,8 @@ def from_arrow(cls, data: pa.Array):
             mask=mask,
         )
 
-    def to_arrow(self):
-        data_buf_32 = np.array(self.base_data.memoryview()).view("int32")
+    def to_arrow(self) -> pa.Array:
+        data_buf_32 = np.array(self.base_data.memoryview()).view("int32")  # type: ignore[union-attr]
         data_buf_128 = np.empty(len(data_buf_32) * 4, dtype="int32")
 
         # use striding to set the first 32 bits of each 128-bit chunk:
@@ -337,7 +337,7 @@ def from_arrow(cls, data: pa.Array):
         result.dtype.precision = data.type.precision
         return result
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         return super().to_arrow().cast(self.dtype.to_arrow())
 
     def _with_type_metadata(
@@ -396,8 +396,8 @@ def from_arrow(cls, data: pa.Array):
             mask=mask,
         )
 
-    def to_arrow(self):
-        data_buf_64 = np.array(self.base_data.memoryview()).view("int64")
+    def to_arrow(self) -> pa.Array:
+        data_buf_64 = np.array(self.base_data.memoryview()).view("int64")  # type: ignore[union-attr]
         data_buf_128 = np.empty(len(data_buf_64) * 2, dtype="int64")
 
         # use striding to set the first 64 bits of each 128-bit chunk:
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index ba98e28f6a2..3d9440cdf21 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -150,7 +150,7 @@ def offsets(self) -> NumericalColumn:
         """
         return cast(NumericalColumn, self.children[0])
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         offsets = self.offsets.to_arrow()
         elements = (
             pa.nulls(len(self.elements))
@@ -160,7 +160,7 @@ def to_arrow(self):
         pa_type = pa.list_(elements.type)
 
         if self.nullable:
-            nbuf = pa.py_buffer(self.mask.memoryview())
+            nbuf = pa.py_buffer(self.mask.memoryview())  # type: ignore[union-attr]
             buffers = (nbuf, offsets.buffers()[1])
         else:
             buffers = offsets.buffers()
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f099cef3331..4405e153b0c 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -14,8 +14,6 @@
 
 import cudf
 import cudf.core.column.column as column
-import cudf.core.column.string as string
-from cudf import _lib as libcudf
 from cudf.api.types import is_integer, is_scalar
 from cudf.core._internals import binaryop, unary
 from cudf.core.buffer import acquire_spill_lock, as_buffer
@@ -366,22 +364,42 @@ def normalize_binop_value(self, other: ScalarLike) -> Self | cudf.Scalar:
         else:
             return NotImplemented
 
-    def int2ip(self) -> "cudf.core.column.StringColumn":
-        if self.dtype != cudf.dtype("uint32"):
+    @acquire_spill_lock()
+    def int2ip(self) -> cudf.core.column.StringColumn:
+        if self.dtype != np.dtype(np.uint32):
             raise TypeError("Only uint32 type can be converted to ip")
-
-        return libcudf.string_casting.int2ip(self)
+        plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
+            self.to_pylibcudf(mode="read")
+        )
+        return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
 
     def as_string_column(self) -> cudf.core.column.StringColumn:
-        if len(self) > 0:
-            return string._numeric_to_str_typecast_functions[
-                cudf.dtype(self.dtype)
-            ](self)
-        else:
+        if len(self) == 0:
             return cast(
                 cudf.core.column.StringColumn,
                 column.column_empty(0, dtype="object"),
             )
+        elif self.dtype.kind == "b":
+            conv_func = functools.partial(
+                plc.strings.convert.convert_booleans.from_booleans,
+                true_string=cudf.Scalar(
+                    "True", dtype="str"
+                ).device_value.c_value,
+                false_string=cudf.Scalar(
+                    "False", dtype="str"
+                ).device_value.c_value,
+            )
+        elif self.dtype.kind in {"i", "u"}:
+            conv_func = plc.strings.convert.convert_integers.from_integers
+        elif self.dtype.kind == "f":
+            conv_func = plc.strings.convert.convert_floats.from_floats
+        else:
+            raise ValueError(f"No string conversion from type {self.dtype}")
+
+        with acquire_spill_lock():
+            return type(self).from_pylibcudf(  # type: ignore[return-value]
+                conv_func(self.to_pylibcudf(mode="read"))
+            )
 
     def as_datetime_column(
         self, dtype: Dtype
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index d76caa5c3b8..fcdcb789f23 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -20,9 +20,8 @@
 import cudf.core.column.column as column
 import cudf.core.column.datetime as datetime
 from cudf import _lib as libcudf
-from cudf._lib import string_casting as str_cast
 from cudf._lib.column import Column
-from cudf._lib.types import size_type_dtype
+from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
 from cudf.core._internals import binaryop
 from cudf.core.buffer import acquire_spill_lock
@@ -49,62 +48,7 @@
     from cudf.core.column.numerical import NumericalColumn
 
 
-def str_to_boolean(column: StringColumn):
-    """Takes in string column and returns boolean column"""
-    with acquire_spill_lock():
-        plc_column = plc.strings.attributes.count_characters(
-            column.to_pylibcudf(mode="read")
-        )
-        result = Column.from_pylibcudf(plc_column)
-    return (result > cudf.Scalar(0, dtype="int8")).fillna(False)
-
-
-_str_to_numeric_typecast_functions = {
-    cudf.api.types.dtype("int8"): str_cast.stoi8,
-    cudf.api.types.dtype("int16"): str_cast.stoi16,
-    cudf.api.types.dtype("int32"): str_cast.stoi,
-    cudf.api.types.dtype("int64"): str_cast.stol,
-    cudf.api.types.dtype("uint8"): str_cast.stoui8,
-    cudf.api.types.dtype("uint16"): str_cast.stoui16,
-    cudf.api.types.dtype("uint32"): str_cast.stoui,
-    cudf.api.types.dtype("uint64"): str_cast.stoul,
-    cudf.api.types.dtype("float32"): str_cast.stof,
-    cudf.api.types.dtype("float64"): str_cast.stod,
-    cudf.api.types.dtype("bool"): str_to_boolean,
-}
-
-_numeric_to_str_typecast_functions = {
-    cudf.api.types.dtype("int8"): str_cast.i8tos,
-    cudf.api.types.dtype("int16"): str_cast.i16tos,
-    cudf.api.types.dtype("int32"): str_cast.itos,
-    cudf.api.types.dtype("int64"): str_cast.ltos,
-    cudf.api.types.dtype("uint8"): str_cast.ui8tos,
-    cudf.api.types.dtype("uint16"): str_cast.ui16tos,
-    cudf.api.types.dtype("uint32"): str_cast.uitos,
-    cudf.api.types.dtype("uint64"): str_cast.ultos,
-    cudf.api.types.dtype("float32"): str_cast.ftos,
-    cudf.api.types.dtype("float64"): str_cast.dtos,
-    cudf.api.types.dtype("bool"): str_cast.from_booleans,
-}
-
-_datetime_to_str_typecast_functions = {
-    # TODO: support Date32 UNIX days
-    # cudf.api.types.dtype("datetime64[D]"): str_cast.int2timestamp,
-    cudf.api.types.dtype("datetime64[s]"): str_cast.int2timestamp,
-    cudf.api.types.dtype("datetime64[ms]"): str_cast.int2timestamp,
-    cudf.api.types.dtype("datetime64[us]"): str_cast.int2timestamp,
-    cudf.api.types.dtype("datetime64[ns]"): str_cast.int2timestamp,
-}
-
-_timedelta_to_str_typecast_functions = {
-    cudf.api.types.dtype("timedelta64[s]"): str_cast.int2timedelta,
-    cudf.api.types.dtype("timedelta64[ms]"): str_cast.int2timedelta,
-    cudf.api.types.dtype("timedelta64[us]"): str_cast.int2timedelta,
-    cudf.api.types.dtype("timedelta64[ns]"): str_cast.int2timedelta,
-}
-
-
-def _is_supported_regex_flags(flags):
+def _is_supported_regex_flags(flags: int) -> bool:
     return flags == 0 or (
         (flags & (re.MULTILINE | re.DOTALL) != 0)
         and (flags & ~(re.MULTILINE | re.DOTALL) == 0)
@@ -155,10 +99,7 @@ def htoi(self) -> SeriesOrIndex:
         3       51966
         dtype: int64
         """
-
-        out = str_cast.htoi(self._column)
-
-        return self._return_or_inplace(out, inplace=False)
+        return self._return_or_inplace(self._column.hex_to_integers())
 
     hex_to_int = htoi
 
@@ -188,10 +129,7 @@ def ip2int(self) -> SeriesOrIndex:
         2            0
         dtype: int64
         """
-
-        out = str_cast.ip2int(self._column)
-
-        return self._return_or_inplace(out, inplace=False)
+        return self._return_or_inplace(self._column.ipv4_to_integers())
 
     ip_to_int = ip2int
 
@@ -1380,7 +1318,7 @@ def ishex(self) -> SeriesOrIndex:
         4     True
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_hex(self._column))
+        return self._return_or_inplace(self._column.is_hex())
 
     def istimestamp(self, format: str) -> SeriesOrIndex:
         """
@@ -1404,9 +1342,7 @@ def istimestamp(self, format: str) -> SeriesOrIndex:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            str_cast.istimestamp(self._column, format)
-        )
+        return self._return_or_inplace(self._column.is_timestamp(format))
 
     def isfloat(self) -> SeriesOrIndex:
         r"""
@@ -1957,7 +1893,7 @@ def isipv4(self) -> SeriesOrIndex:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_ipv4(self._column))
+        return self._return_or_inplace(self._column.is_ipv4())
 
     def lower(self) -> SeriesOrIndex:
         """
@@ -4125,9 +4061,7 @@ def removesuffix(self, suffix: str) -> SeriesOrIndex:
         ends_column = self.endswith(suffix)._column  # type: ignore[union-attr]
         removed_column = self.slice(0, -len(suffix), None)._column  # type: ignore[union-attr]
 
-        result = cudf._lib.copying.copy_if_else(
-            removed_column, self._column, ends_column
-        )
+        result = removed_column.copy_if_else(self._column, ends_column)
         return self._return_or_inplace(result)
 
     def removeprefix(self, prefix: str) -> SeriesOrIndex:
@@ -4165,9 +4099,7 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex:
             return self._return_or_inplace(self._column)
         starts_column = self.startswith(prefix)._column  # type: ignore[union-attr]
         removed_column = self.slice(len(prefix), None, None)._column  # type: ignore[union-attr]
-        result = cudf._lib.copying.copy_if_else(
-            removed_column, self._column, starts_column
-        )
+        result = removed_column.copy_if_else(self._column, starts_column)
         return self._return_or_inplace(result)
 
     def _find(
@@ -5826,26 +5758,38 @@ def __contains__(self, item: ScalarLike) -> bool:
         other = [item] if is_scalar(item) else item
         return self.contains(column.as_column(other, dtype=self.dtype)).any()
 
-    def as_numerical_column(
-        self, dtype: Dtype
-    ) -> "cudf.core.column.NumericalColumn":
+    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         out_dtype = cudf.api.types.dtype(dtype)
-        string_col = self
-        if out_dtype.kind in {"i", "u"}:
-            if not string_col.is_integer().all():
+        if out_dtype.kind == "b":
+            with acquire_spill_lock():
+                plc_column = plc.strings.attributes.count_characters(
+                    self.to_pylibcudf(mode="read")
+                )
+                result = Column.from_pylibcudf(plc_column)
+            return (result > cudf.Scalar(0, dtype="int8")).fillna(False)
+        elif out_dtype.kind in {"i", "u"}:
+            if not self.is_integer().all():
                 raise ValueError(
                     "Could not convert strings to integer "
                     "type due to presence of non-integer values."
                 )
+            cast_func = plc.strings.convert.convert_integers.to_integers
         elif out_dtype.kind == "f":
-            if not string_col.is_float().all():
+            if not self.is_float().all():
                 raise ValueError(
                     "Could not convert strings to float "
                     "type due to presence of non-floating values."
                 )
-
-        result_col = _str_to_numeric_typecast_functions[out_dtype](string_col)
-        return result_col
+            cast_func = plc.strings.convert.convert_floats.to_floats
+        else:
+            raise ValueError(
+                f"dtype must be a numerical type, not {out_dtype}"
+            )
+        plc_dtype = dtype_to_pylibcudf_type(out_dtype)
+        with acquire_spill_lock():
+            return type(self).from_pylibcudf(  # type: ignore[return-value]
+                cast_func(self.to_pylibcudf(mode="read"), plc_dtype)
+            )
 
     def strptime(
         self, dtype: Dtype, format: str
@@ -5880,23 +5824,27 @@ def strptime(
                 raise NotImplementedError(
                     "Cannot parse date-like strings with different formats"
                 )
-            valid_ts = str_cast.istimestamp(self, format)
+            valid_ts = self.is_timestamp(format)
             valid = valid_ts | is_nat
             if not valid.all():
                 raise ValueError(f"Column contains invalid data for {format=}")
 
-            casting_func = str_cast.timestamp2int
+            casting_func = plc.strings.convert.convert_datetime.to_timestamps
             add_back_nat = is_nat.any()
         elif dtype.kind == "m":  # type: ignore[union-attr]
-            casting_func = str_cast.timedelta2int
+            casting_func = plc.strings.convert.convert_durations.to_durations
             add_back_nat = False
 
-        result_col = casting_func(self, dtype, format)
+        with acquire_spill_lock():
+            plc_dtype = dtype_to_pylibcudf_type(dtype)
+            result_col = type(self).from_pylibcudf(
+                casting_func(self.to_pylibcudf(mode="read"), plc_dtype, format)
+            )
 
         if add_back_nat:
             result_col[is_nat] = None
 
-        return result_col
+        return result_col  # type: ignore[return-value]
 
     def as_datetime_column(
         self, dtype: Dtype
@@ -6398,15 +6346,15 @@ def detokenize(self, indices: ColumnBase, separator: cudf.Scalar) -> Self:
             )
         )
 
+    @acquire_spill_lock()
     def _modify_characters(
         self, method: Callable[[plc.Column], plc.Column]
     ) -> Self:
         """
         Helper function for methods that modify characters e.g. to_lower
         """
-        with acquire_spill_lock():
-            plc_column = method(self.to_pylibcudf(mode="read"))
-            return cast(Self, Column.from_pylibcudf(plc_column))
+        plc_column = method(self.to_pylibcudf(mode="read"))
+        return cast(Self, Column.from_pylibcudf(plc_column))
 
     def to_lower(self) -> Self:
         return self._modify_characters(plc.strings.case.to_lower)
@@ -6435,6 +6383,46 @@ def replace_multiple(self, pattern: Self, replacements: Self) -> Self:
         )
         return cast(Self, Column.from_pylibcudf(plc_result))
 
+    @acquire_spill_lock()
+    def is_hex(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.strings.convert.convert_integers.is_hex(
+                self.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def hex_to_integers(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.strings.convert.convert_integers.hex_to_integers(
+                self.to_pylibcudf(mode="read"), plc.DataType(plc.TypeId.INT64)
+            )
+        )
+
+    @acquire_spill_lock()
+    def is_ipv4(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.strings.convert.convert_ipv4.is_ipv4(
+                self.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def ipv4_to_integers(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.strings.convert.convert_ipv4.ipv4_to_integers(
+                self.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def is_timestamp(self, format: str) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.strings.convert.convert_datetime.is_timestamp(
+                self.to_pylibcudf(mode="read"), format
+            )
+        )
+
     @acquire_spill_lock()
     def _split_record_re(
         self,
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 8b1515acae2..417fa99dac0 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -10,9 +10,10 @@
 import pandas as pd
 import pyarrow as pa
 
+import pylibcudf as plc
+
 import cudf
 import cudf.core.column.column as column
-import cudf.core.column.string as string
 from cudf.api.types import is_scalar
 from cudf.core._internals import binaryop, unary
 from cudf.core.buffer import Buffer, acquire_spill_lock
@@ -297,9 +298,12 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
                 column.column_empty(0, dtype="object"),
             )
         else:
-            return string._timedelta_to_str_typecast_functions[self.dtype](
-                self, format=format
-            )
+            with acquire_spill_lock():
+                return type(self).from_pylibcudf(  # type: ignore[return-value]
+                    plc.strings.convert.convert_durations.from_durations(
+                        self.to_pylibcudf(mode="read"), format
+                    )
+                )
 
     def as_string_column(self) -> cudf.core.column.StringColumn:
         return self.strftime("%D days %H:%M:%S")
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 89649a7b750..3334b57ce1b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1802,13 +1802,37 @@ def _concat(
                 )
                 for table in tables
             ]
-
-            concatted = libcudf.utils.data_from_pylibcudf_table(
-                plc.concatenate.concatenate(plc_tables),
-                column_names=column_names,
-                index_names=index_names,
-            )
-        out = cls._from_data(*concatted)
+            plc_result = plc.concatenate.concatenate(plc_tables)
+            if ignore:
+                index = None
+                data = {
+                    col_name: ColumnBase.from_pylibcudf(col)
+                    for col_name, col in zip(
+                        column_names, plc_result.columns(), strict=True
+                    )
+                }
+            else:
+                result_columns = [
+                    ColumnBase.from_pylibcudf(col)
+                    for col in plc_result.columns()
+                ]
+                index = _index_from_data(
+                    dict(
+                        zip(
+                            index_names,
+                            result_columns[: len(index_names)],
+                            strict=True,
+                        )
+                    )
+                )
+                data = dict(
+                    zip(
+                        column_names,
+                        result_columns[len(index_names) :],
+                        strict=True,
+                    )
+                )
+        out = cls._from_data(data=data, index=index)
 
         # If ignore_index is True, all input frames are empty, and at
         # least one input frame has an index, assign a new RangeIndex
@@ -3172,10 +3196,7 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):
             )
 
             if cond_col := cond._data.get(name):
-                result = cudf._lib.copying.copy_if_else(
-                    source_col, other_col, cond_col
-                )
-
+                result = source_col.copy_if_else(other_col, cond_col)
                 out.append(result._with_type_metadata(col.dtype))
             else:
                 out_mask = as_buffer(
@@ -7857,7 +7878,8 @@ def interleave_columns(self):
         return self._constructor_sliced._from_column(result_col)
 
     @acquire_spill_lock()
-    def _compute_columns(self, expr: str) -> ColumnBase:
+    def _compute_column(self, expr: str) -> ColumnBase:
+        """Helper function for eval"""
         plc_column = plc.transform.compute_column(
             plc.Table(
                 [col.to_pylibcudf(mode="read") for col in self._columns]
@@ -7993,7 +8015,7 @@ def eval(self, expr: str, inplace: bool = False, **kwargs):
                 raise ValueError(
                     "Cannot operate inplace if there is no assignment"
                 )
-            return Series._from_column(self._compute_columns(statements[0]))
+            return Series._from_column(self._compute_column(statements[0]))
 
         targets = []
         exprs = []
@@ -8011,7 +8033,7 @@ def eval(self, expr: str, inplace: bool = False, **kwargs):
 
         ret = self if inplace else self.copy(deep=False)
         for name, expr in zip(targets, exprs):
-            ret._data[name] = self._compute_columns(expr)
+            ret._data[name] = self._compute_column(expr)
         if not inplace:
             return ret
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 4f40ba0bd92..9aadbf8f47a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -22,7 +22,7 @@
 from cudf import _lib as libcudf
 from cudf.api.types import is_dtype_equal, is_scalar
 from cudf.core._compat import PANDAS_LT_300
-from cudf.core._internals import sorting
+from cudf.core._internals import copying, sorting
 from cudf.core._internals.search import search_sorted
 from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock
@@ -946,16 +946,17 @@ def from_arrow(cls, data: pa.Table) -> Self:
         if len(dict_indices):
             dict_indices_table = pa.table(dict_indices)
             data = data.drop(dict_indices_table.column_names)
-            indices_columns = libcudf.interop.from_arrow(dict_indices_table)
+            plc_indices = plc.interop.from_arrow(dict_indices_table)
             # as dictionary size can vary, it can't be a single table
             cudf_dictionaries_columns = {
                 name: ColumnBase.from_arrow(dict_dictionaries[name])
                 for name in dict_dictionaries.keys()
             }
 
-            for name, codes in zip(
-                dict_indices_table.column_names, indices_columns
+            for name, plc_codes in zip(
+                dict_indices_table.column_names, plc_indices.columns()
             ):
+                codes = libcudf.column.Column.from_pylibcudf(plc_codes)
                 categories = cudf_dictionaries_columns[name]
                 codes = as_unsigned_codes(len(categories), codes)
                 cudf_category_frame[name] = CategoricalColumn(
@@ -971,9 +972,9 @@ def from_arrow(cls, data: pa.Table) -> Self:
 
         # Handle non-dict arrays
         cudf_non_category_frame = {
-            name: col
-            for name, col in zip(
-                data.column_names, libcudf.interop.from_arrow(data)
+            name: libcudf.column.Column.from_pylibcudf(plc_col)
+            for name, plc_col in zip(
+                data.column_names, plc.interop.from_arrow(data).columns()
             )
         }
 
@@ -1032,7 +1033,7 @@ def from_arrow(cls, data: pa.Table) -> Self:
         return cls._from_data({name: result[name] for name in column_names})
 
     @_performance_tracking
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Table:
         """
         Convert to arrow Table
 
@@ -1058,19 +1059,6 @@ def to_arrow(self):
             }
         )
 
-    @_performance_tracking
-    def _positions_from_column_names(self, column_names) -> list[int]:
-        """Map each column name into their positions in the frame.
-
-        The order of indices returned corresponds to the column order in this
-        Frame.
-        """
-        return [
-            i
-            for i, name in enumerate(self._column_names)
-            if name in set(column_names)
-        ]
-
     @_performance_tracking
     def _copy_type_metadata(self: Self, other: Self) -> Self:
         """
@@ -1485,18 +1473,13 @@ def _get_sorted_inds(
         )
 
     @_performance_tracking
-    def _split(self, splits):
+    def _split(self, splits: list[int]) -> list[Self]:
         """Split a frame with split points in ``splits``. Returns a list of
         Frames of length `len(splits) + 1`.
         """
         return [
-            self._from_columns_like_self(
-                libcudf.copying.columns_split(list(self._columns), splits)[
-                    split_idx
-                ],
-                self._column_names,
-            )
-            for split_idx in range(len(splits) + 1)
+            self._from_columns_like_self(split, self._column_names)
+            for split in copying.columns_split(self._columns, splits)
         ]
 
     @_performance_tracking
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 965ba7face7..e8a9e599cb0 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -16,6 +16,7 @@
 import pylibcudf as plc
 
 import cudf
+import cudf.core._internals
 from cudf import _lib as libcudf
 from cudf._lib import groupby as libgroupby
 from cudf._lib.types import size_type_dtype
@@ -430,7 +431,9 @@ def indices(self) -> dict[ScalarLike, cp.ndarray]:
             ]
         )
 
-        group_keys = libcudf.stream_compaction.drop_duplicates(group_keys)
+        group_keys = cudf.core._internals.stream_compaction.drop_duplicates(
+            group_keys
+        )
         if len(group_keys) > 1:
             index = cudf.MultiIndex.from_arrays(group_keys)
         else:
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8d3ef1036d1..eac5b9d71ae 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -31,6 +31,7 @@
 )
 from cudf.core._base_index import BaseIndex, _return_get_indexer_result
 from cudf.core._compat import PANDAS_LT_300
+from cudf.core._internals import copying
 from cudf.core._internals.search import search_sorted
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
@@ -1371,7 +1372,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             )
             scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
             indices = libcudf.column.Column.from_pylibcudf(right_plc)
-        result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
+        result = copying.scatter([indices], scatter_map, [result])[0]
         result_series = cudf.Series._from_column(result)
 
         if method in {"ffill", "bfill", "pad", "backfill"}:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e58a8bd179e..6854cb02aa5 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -27,6 +27,7 @@
 import cudf
 import cudf._lib as libcudf
 import cudf.core
+import cudf.core._internals
 import cudf.core.algorithms
 from cudf.api.extensions import no_default
 from cudf.api.types import (
@@ -37,6 +38,7 @@
 )
 from cudf.core._base_index import BaseIndex
 from cudf.core._compat import PANDAS_LT_300
+from cudf.core._internals import copying
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import ColumnBase, NumericalColumn, as_column
 from cudf.core.column_accessor import ColumnAccessor
@@ -2950,10 +2952,10 @@ def _gather(
         if not gather_map.nullify and len(self) != gather_map.nrows:
             raise IndexError("Gather map is out of bounds")
         return self._from_columns_like_self(
-            libcudf.copying.gather(
-                list(self.index._columns + self._columns)
+            copying.gather(
+                itertools.chain(self.index._columns, self._columns)
                 if keep_index
-                else list(self._columns),
+                else self._columns,
                 gather_map.column,
                 nullify=gather_map.nullify,
             ),
@@ -3033,16 +3035,24 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
                 keep_index=keep_index,
             )
 
-        columns_to_slice = [
-            *(
-                self.index._columns
-                if keep_index and not has_range_index
-                else []
-            ),
-            *self._columns,
-        ]
+        columns_to_slice = (
+            itertools.chain(self.index._columns, self._columns)
+            if keep_index and not has_range_index
+            else self._columns
+        )
+        with acquire_spill_lock():
+            plc_tables = plc.copying.slice(
+                plc.Table(
+                    [col.to_pylibcudf(mode="read") for col in columns_to_slice]
+                ),
+                [start, stop],
+            )
+            sliced = [
+                libcudf.column.Column.from_pylibcudf(col)
+                for col in plc_tables[0].columns()
+            ]
         result = self._from_columns_like_self(
-            libcudf.copying.columns_slice(columns_to_slice, [start, stop])[0],
+            sliced,
             self._column_names,
             None if has_range_index or not keep_index else self.index.names,
         )
@@ -3052,21 +3062,21 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
         return result
 
     def _positions_from_column_names(
-        self, column_names, offset_by_index_columns=False
-    ):
+        self,
+        column_names: set[abc.Hashable],
+        offset_by_index_columns: bool = True,
+    ) -> list[int]:
         """Map each column name into their positions in the frame.
 
         Return positions of the provided column names, offset by the number of
         index columns if `offset_by_index_columns` is True. The order of
         indices returned corresponds to the column order in this Frame.
         """
-        num_index_columns = (
-            len(self.index._data) if offset_by_index_columns else 0
-        )
+        start = self.index.nlevels if offset_by_index_columns else 0
         return [
-            i + num_index_columns
-            for i, name in enumerate(self._column_names)
-            if name in set(column_names)
+            i
+            for i, name in enumerate(self._column_names, start=start)
+            if name in column_names
         ]
 
     def drop_duplicates(
@@ -3103,7 +3113,7 @@ def drop_duplicates(
             subset, offset_by_index_columns=not ignore_index
         )
         return self._from_columns_like_self(
-            libcudf.stream_compaction.drop_duplicates(
+            cudf.core._internals.stream_compaction.drop_duplicates(
                 list(self._columns)
                 if ignore_index
                 else list(self.index._columns + self._columns),
@@ -3116,7 +3126,9 @@ def drop_duplicates(
         )
 
     @_performance_tracking
-    def duplicated(self, subset=None, keep="first"):
+    def duplicated(
+        self, subset=None, keep: Literal["first", "last", False] = "first"
+    ) -> cudf.Series:
         """
         Return boolean Series denoting duplicate rows.
 
@@ -3216,10 +3228,25 @@ def duplicated(self, subset=None, keep="first"):
             name = self.name
         else:
             columns = [self._data[n] for n in subset]
-        distinct = libcudf.stream_compaction.distinct_indices(
-            columns, keep=keep
-        )
-        result = libcudf.copying.scatter(
+
+        _keep_options = {
+            "first": plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
+            "last": plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
+            False: plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
+        }
+
+        if (keep_option := _keep_options.get(keep)) is None:
+            raise ValueError('keep must be either "first", "last" or False')
+
+        with acquire_spill_lock():
+            plc_column = plc.stream_compaction.distinct_indices(
+                plc.Table([col.to_pylibcudf(mode="read") for col in columns]),
+                keep_option,
+                plc.types.NullEquality.EQUAL,
+                plc.types.NanEquality.ALL_EQUAL,
+            )
+            distinct = libcudf.column.Column.from_pylibcudf(plc_column)
+        result = copying.scatter(
             [cudf.Scalar(False, dtype=bool)],
             distinct,
             [as_column(True, length=len(self), dtype=bool)],
@@ -3228,14 +3255,26 @@ def duplicated(self, subset=None, keep="first"):
         return cudf.Series._from_column(result, index=self.index, name=name)
 
     @_performance_tracking
-    def _empty_like(self, keep_index=True) -> Self:
+    def _empty_like(self, keep_index: bool = True) -> Self:
+        with acquire_spill_lock():
+            plc_table = plc.copying.empty_like(
+                plc.Table(
+                    [
+                        col.to_pylibcudf(mode="read")
+                        for col in (
+                            itertools.chain(self.index._columns, self._columns)
+                            if keep_index
+                            else self._columns
+                        )
+                    ]
+                )
+            )
+            columns = [
+                libcudf.column.Column.from_pylibcudf(col)
+                for col in plc_table.columns()
+            ]
         result = self._from_columns_like_self(
-            libcudf.copying.columns_empty_like(
-                [
-                    *(self.index._columns if keep_index else ()),
-                    *self._columns,
-                ]
-            ),
+            columns,
             self._column_names,
             self.index.names if keep_index else None,
         )
@@ -3243,25 +3282,24 @@ def _empty_like(self, keep_index=True) -> Self:
         result._data.rangeindex = self._data.rangeindex
         return result
 
-    def _split(self, splits, keep_index=True):
+    def _split(self, splits, keep_index: bool = True) -> list[Self]:
         if self._num_rows == 0:
             return []
 
-        columns_split = libcudf.copying.columns_split(
-            [
-                *(self.index._columns if keep_index else []),
-                *self._columns,
-            ],
+        columns_split = copying.columns_split(
+            itertools.chain(self.index._columns, self._columns)
+            if keep_index
+            else self._columns,
             splits,
         )
 
         return [
             self._from_columns_like_self(
-                columns_split[i],
+                split,
                 self._column_names,
                 self.index.names if keep_index else None,
             )
-            for i in range(len(splits) + 1)
+            for split in columns_split
         ]
 
     @_performance_tracking
@@ -4331,12 +4369,10 @@ def _drop_na_rows(self, how="any", subset=None, thresh=None):
         data_columns = [col.nans_to_nulls() for col in self._columns]
 
         return self._from_columns_like_self(
-            libcudf.stream_compaction.drop_nulls(
+            cudf.core._internals.stream_compaction.drop_nulls(
                 [*self.index._columns, *data_columns],
                 how=how,
-                keys=self._positions_from_column_names(
-                    subset, offset_by_index_columns=True
-                ),
+                keys=self._positions_from_column_names(subset),
                 thresh=thresh,
             ),
             self._column_names,
@@ -4356,7 +4392,7 @@ def _apply_boolean_mask(self, boolean_mask: BooleanMask, keep_index=True):
                 f"{len(boolean_mask.column)} not {len(self)}"
             )
         return self._from_columns_like_self(
-            libcudf.stream_compaction.apply_boolean_mask(
+            cudf.core._internals.stream_compaction.apply_boolean_mask(
                 list(self.index._columns + self._columns)
                 if keep_index
                 else list(self._columns),
@@ -6267,17 +6303,16 @@ def ge(self, other, axis="columns", level=None, fill_value=None):
             other=other, op="__ge__", fill_value=fill_value, can_reindex=True
         )
 
-    def _preprocess_subset(self, subset):
+    def _preprocess_subset(self, subset) -> set[abc.Hashable]:
         if subset is None:
             subset = self._column_names
         elif (
-            not np.iterable(subset)
-            or isinstance(subset, str)
+            is_scalar(subset)
             or isinstance(subset, tuple)
             and subset in self._column_names
         ):
             subset = (subset,)
-        diff = set(subset) - set(self._data)
+        diff = set(subset) - set(self._column_names)
         if len(diff) != 0:
             raise KeyError(f"columns {diff} do not exist")
         return subset
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index e7ea91c1f21..6e965ceca66 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
-import itertools
 from typing import Any
 
 import pylibcudf as plc
@@ -243,20 +242,12 @@ def _gather_maps(self, left_cols, right_cols):
         # To reorder maps so that they are in order of the input
         # tables, we gather from iota on both right and left, and then
         # sort the gather maps with those two columns as key.
-        key_order = list(
-            itertools.chain.from_iterable(
-                libcudf.copying.gather(
-                    [
-                        cudf.core.column.as_column(
-                            range(n), dtype=size_type_dtype
-                        )
-                    ],
-                    map_,
-                    nullify=null,
-                )
-                for map_, n, null in zip(maps, lengths, nullify)
+        key_order = [
+            cudf.core.column.as_column(range(n), dtype=size_type_dtype).take(
+                map_, nullify=null, check_bounds=False
             )
-        )
+            for map_, n, null in zip(maps, lengths, nullify)
+        ]
         return sorting.sort_by_key(
             list(maps),
             # If how is right, right map is primary sort key.
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index bc280f3d213..1e613e49ffc 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -22,7 +22,7 @@
 from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar
 from cudf.core import column
 from cudf.core._base_index import _return_get_indexer_result
-from cudf.core._internals import sorting
+from cudf.core._internals import copying, sorting
 from cudf.core.algorithms import factorize
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column_accessor import ColumnAccessor
@@ -200,10 +200,8 @@ def __init__(
                 if lo == -1:
                     # Now we can gather and insert null automatically
                     code[code == -1] = np.iinfo(size_type_dtype).min
-            result_col = libcudf.copying.gather(
-                [level._column], code, nullify=True
-            )
-            source_data[i] = result_col[0]._with_type_metadata(level.dtype)
+            result_col = level._column.take(code, nullify=True)
+            source_data[i] = result_col._with_type_metadata(level.dtype)
 
         super().__init__(ColumnAccessor(source_data))
         self._levels = new_levels
@@ -1934,7 +1932,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             )
             scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
             indices = libcudf.column.Column.from_pylibcudf(right_plc)
-        result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
+        result = copying.scatter([indices], scatter_map, [result])[0]
         result_series = cudf.Series._from_column(result)
 
         if method in {"ffill", "bfill", "pad", "backfill"}:
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index ffcb5d273a0..0abd42d4d4e 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1030,7 +1030,8 @@ def as_tuple(x):
                 {
                     name: idx._column
                     for name, idx in zip(
-                        names, target._split(range(nrows, new_size, nrows))
+                        names,
+                        target._split(list(range(nrows, new_size, nrows))),
                     )
                 }
             )
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index f6d0664758f..9c8da020ddc 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -380,7 +380,7 @@ def where(self, cond, other=None, inplace=False):
             source_col=self._column, other=other, inplace=inplace
         )
 
-        result = cudf._lib.copying.copy_if_else(input_col, other, cond)
+        result = input_col.copy_if_else(other, cond)
         return result._with_type_metadata(self.dtype)
 
     @_performance_tracking
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 40348461f8c..6d3dc2dc7d9 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -8,7 +8,6 @@
 import pandas as pd
 
 import cudf
-from cudf import _lib as libcudf
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_string_dtype
 from cudf.core._internals import unary
 from cudf.core.column import as_column
@@ -251,9 +250,9 @@ def _convert_str_col(
             return converted_col.astype(dtype=cudf.dtype("float64"))  # type: ignore[return-value]
     else:
         if errors == "coerce":
-            converted_col = libcudf.string_casting.stod(converted_col)
             non_numerics = is_float.unary_operator("not")
             converted_col[non_numerics] = None
+            converted_col = converted_col.astype(np.dtype(np.float64))  # type: ignore[assignment]
             return converted_col  # type: ignore[return-value]
         else:
             raise ValueError("Unable to convert some strings to numerics.")
diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py
index 11730e98c95..4966cdb86e1 100644
--- a/python/cudf/cudf/io/avro.py
+++ b/python/cudf/cudf/io/avro.py
@@ -33,11 +33,18 @@ def read_avro(
     if not isinstance(skip_rows, int) or skip_rows < 0:
         raise TypeError("skip_rows must be an int >= 0")
 
-    plc_result = plc.io.avro.read_avro(
-        plc.io.types.SourceInfo([filepath_or_buffer]),
-        columns,
-        skip_rows,
-        num_rows,
+    options = (
+        plc.io.avro.AvroReaderOptions.builder(
+            plc.io.types.SourceInfo([filepath_or_buffer])
+        )
+        .skip_rows(skip_rows)
+        .num_rows(num_rows)
+        .build()
     )
 
+    if columns is not None and len(columns) > 0:
+        options.set_columns(columns)
+
+    plc_result = plc.io.avro.read_avro(options)
+
     return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result))
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index fe8e446f9c0..3b3fd5f7c56 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -1,13 +1,14 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
+import pylibcudf as plc
 
 import cudf
-from cudf._lib import interop as libdlpack
 from cudf.core.column import ColumnBase
 from cudf.utils import ioutils
 
 
-def from_dlpack(pycapsule_obj):
+def from_dlpack(pycapsule_obj) -> cudf.Series | cudf.DataFrame:
     """Converts from a DLPack tensor to a cuDF object.
 
     DLPack is an open-source memory tensor structure:
@@ -33,18 +34,21 @@ def from_dlpack(pycapsule_obj):
     cuDF from_dlpack() assumes column-major (Fortran order) input. If the input
     tensor is row-major, transpose it before passing it to this function.
     """
+    plc_table = plc.interop.from_dlpack(pycapsule_obj)
+    data = dict(
+        enumerate(
+            (ColumnBase.from_pylibcudf(col) for col in plc_table.columns())
+        )
+    )
 
-    columns = libdlpack.from_dlpack(pycapsule_obj)
-    data = dict(enumerate(columns))
-
-    if len(columns) == 1:
+    if len(data) == 1:
         return cudf.Series._from_data(data)
     else:
         return cudf.DataFrame._from_data(data)
 
 
 @ioutils.doc_to_dlpack()
-def to_dlpack(cudf_obj):
+def to_dlpack(cudf_obj: cudf.Series | cudf.DataFrame | cudf.BaseIndex):
     """Converts a cuDF object to a DLPack tensor.
 
     DLPack is an open-source memory tensor structure:
@@ -80,13 +84,14 @@ def to_dlpack(cudf_obj):
 
     if any(
         not cudf.api.types._is_non_decimal_numeric_dtype(dtype)
-        for _, dtype in gdf._dtypes
+        for _, dtype in gdf._dtypes  # type: ignore[union-attr]
     ):
         raise TypeError("non-numeric data not yet supported")
 
     dtype = cudf.utils.dtypes.find_common_type(
-        [dtype for _, dtype in gdf._dtypes]
+        [dtype for _, dtype in gdf._dtypes]  # type: ignore[union-attr]
     )
     gdf = gdf.astype(dtype)
-
-    return libdlpack.to_dlpack([*gdf._columns])
+    return plc.interop.to_dlpack(
+        plc.Table([col.to_pylibcudf(mode="read") for col in gdf._columns])
+    )
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 89af00c713d..e0c9e535e6f 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -91,11 +91,6 @@ def read_json(
         if dtype is None:
             dtype = True
 
-        if kwargs:
-            raise ValueError(
-                "cudf engine doesn't support the "
-                f"following keyword arguments: {list(kwargs.keys())}"
-            )
         if args:
             raise ValueError(
                 "cudf engine doesn't support the "
@@ -198,6 +193,7 @@ def read_json(
                 mixed_types_as_string=mixed_types_as_string,
                 prune_columns=prune_columns,
                 recovery_mode=c_on_bad_lines,
+                extra_parameters=kwargs,
             )
 
             df = cudf.DataFrame._from_data(
@@ -291,21 +287,25 @@ def _plc_write_json(
     rows_per_chunk: int = 1024 * 64,  # 64K rows
 ) -> None:
     try:
-        plc.io.json.write_json(
-            plc.io.SinkInfo([path_or_buf]),
-            plc.io.TableWithMetadata(
-                plc.Table(
-                    [col.to_pylibcudf(mode="read") for col in table._columns]
-                ),
-                colnames,
+        tbl_w_meta = plc.io.TableWithMetadata(
+            plc.Table(
+                [col.to_pylibcudf(mode="read") for col in table._columns]
             ),
-            na_rep,
-            include_nulls,
-            lines,
-            rows_per_chunk,
-            true_value="true",
-            false_value="false",
+            colnames,
+        )
+        options = (
+            plc.io.json.JsonWriterOptions.builder(
+                plc.io.SinkInfo([path_or_buf]), tbl_w_meta.tbl
+            )
+            .metadata(tbl_w_meta)
+            .na_rep(na_rep)
+            .include_nulls(include_nulls)
+            .lines(lines)
+            .build()
         )
+        if rows_per_chunk != np.iinfo(np.int32).max:
+            options.set_rows_per_chunk(rows_per_chunk)
+        plc.io.json.write_json(options)
     except OverflowError as err:
         raise OverflowError(
             f"Writing JSON file with rows_per_chunk={rows_per_chunk} failed. "
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 153ee0fa01a..c13489630a3 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -25,9 +25,7 @@
 from cudf._lib.column import Column
 from cudf._lib.utils import (
     _data_from_columns,
-    _index_level_name,
     data_from_pylibcudf_io,
-    generate_pandas_metadata,
 )
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
@@ -128,7 +126,7 @@ def _plc_write_parquet(
         tbl_meta = plc.io.types.TableInputMetadata(plc_table)
         for level, idx_name in enumerate(table.index.names):
             tbl_meta.column_metadata[level].set_name(
-                _index_level_name(idx_name, level, table._column_names)
+                ioutils._index_level_name(idx_name, level, table._column_names)
             )
         num_index_cols_meta = len(table.index.names)
     else:
@@ -162,7 +160,7 @@ def _plc_write_parquet(
     if partitions_info is not None:
         user_data = [
             {
-                "pandas": generate_pandas_metadata(
+                "pandas": ioutils.generate_pandas_metadata(
                     table.iloc[start_row : start_row + num_row].copy(
                         deep=False
                     ),
@@ -172,7 +170,9 @@ def _plc_write_parquet(
             for start_row, num_row in partitions_info
         ]
     else:
-        user_data = [{"pandas": generate_pandas_metadata(table, index)}]
+        user_data = [
+            {"pandas": ioutils.generate_pandas_metadata(table, index)}
+        ]
 
     if header_version not in ("1.0", "2.0"):
         raise ValueError(
@@ -1737,7 +1737,7 @@ def _initialize_chunked_state(
             False if isinstance(table.index, cudf.RangeIndex) else self.index
         )
         user_data = [
-            {"pandas": generate_pandas_metadata(table, index)}
+            {"pandas": ioutils.generate_pandas_metadata(table, index)}
         ] * num_partitions
         comp_type = _get_comp_type(self.compression)
         stat_freq = _get_stat_freq(self.statistics)
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index db24fdd2a29..8e1dba858c3 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -950,3 +950,13 @@ def test_index_set_categories(ordered):
     expected = pd_ci.set_categories([1, 2, 3, 4], ordered=ordered)
     result = cudf_ci.set_categories([1, 2, 3, 4], ordered=ordered)
     assert_eq(result, expected)
+
+
+def test_categorical_interval_pandas_roundtrip():
+    expected = cudf.Series(cudf.interval_range(0, 5)).astype("category")
+    result = cudf.Series.from_pandas(expected.to_pandas())
+    assert_eq(result, expected)
+
+    expected = pd.Series(pd.interval_range(0, 5)).astype("category")
+    result = cudf.Series.from_pandas(expected).to_pandas()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 260b481b933..da0aa5be6f5 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -10,7 +10,6 @@
 
 import cudf
 from cudf import NA
-from cudf._lib.copying import get_element
 from cudf.api.types import is_scalar
 from cudf.core.column.column import column_empty
 from cudf.testing import assert_eq
@@ -715,9 +714,8 @@ def test_list_scalar_host_construction_null(elem_type, nesting_level):
     ],
 )
 def test_list_scalar_device_construction(data):
-    col = cudf.Series([data])._column
-    slr = get_element(col, 0)
-    assert slr.value == data
+    res = cudf.Series([data])._column.element_indexing(0)
+    assert res == data
 
 
 @pytest.mark.parametrize("nesting_level", [1, 2, 3])
@@ -729,10 +727,8 @@ def test_list_scalar_device_construction_null(nesting_level):
     arrow_type = pa.infer_type(data)
     arrow_arr = pa.array([None], type=arrow_type)
 
-    col = cudf.Series(arrow_arr)._column
-    slr = get_element(col, 0)
-
-    assert slr.value is cudf.NA
+    res = cudf.Series(arrow_arr)._column.element_indexing(0)
+    assert res is cudf.NA
 
 
 @pytest.mark.parametrize("input_obj", [[[1, NA, 3]], [[1, NA, 3], [4, 5, NA]]])
diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py
deleted file mode 100644
index b474bbe9bd8..00000000000
--- a/python/cudf/cudf/tests/test_pack.py
+++ /dev/null
@@ -1,317 +0,0 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pickle
-import sys
-
-import numpy as np
-import pandas as pd
-
-from cudf import DataFrame, Index, Series
-from cudf._lib.copying import pack, unpack
-from cudf.testing import assert_eq
-
-
-def test_sizeof_packed_dataframe():
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame()
-    nelem = 1000
-    df["keys"] = hkeys = np.arange(nelem, dtype=np.float64)
-    df["vals"] = hvals = rng.random(nelem)
-    packed = pack(df)
-
-    nbytes = hkeys.nbytes + hvals.nbytes
-    sizeof = sys.getsizeof(packed)
-    assert sizeof < nbytes
-
-    serialized_nbytes = len(
-        pickle.dumps(packed, protocol=pickle.HIGHEST_PROTOCOL)
-    )
-
-    # assert at least sizeof bytes were serialized
-    assert serialized_nbytes >= sizeof
-
-
-def check_packed_equality(df):
-    # basic
-    assert_packed_frame_equality(df)
-    # sliced
-    assert_packed_frame_equality(df[:-1])
-    assert_packed_frame_equality(df[1:])
-    assert_packed_frame_equality(df[2:-2])
-    # sorted
-    sortvaldf = df.sort_values("vals")
-    assert isinstance(sortvaldf.index, Index)
-    assert_packed_frame_equality(sortvaldf)
-
-
-def assert_packed_frame_equality(df):
-    pdf = df.to_pandas()
-
-    packed = pack(df)
-    del df
-    unpacked = unpack(packed)
-
-    assert_eq(unpacked, pdf)
-
-
-def test_packed_dataframe_equality_numeric():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    nelem = 10
-    df["keys"] = np.arange(nelem, dtype=np.float64)
-    df["vals"] = rng.random(nelem)
-
-    check_packed_equality(df)
-
-
-def test_packed_dataframe_equality_categorical():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = pd.Categorical(
-        ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_equality(df)
-
-
-def test_packed_dataframe_equality_list():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10)))
-    df["vals"] = rng.random(len(df))
-
-    check_packed_equality(df)
-
-
-def test_packed_dataframe_equality_struct():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(
-        list({"0": i, "1": i + 1, "2": i + 2} for i in range(10))
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_equality(df)
-
-
-def check_packed_unique_pointers(df):
-    # basic
-    assert_packed_frame_unique_pointers(df)
-    # sliced
-    assert_packed_frame_unique_pointers(df[:-1])
-    assert_packed_frame_unique_pointers(df[1:])
-    assert_packed_frame_unique_pointers(df[2:-2])
-    # sorted
-    sortvaldf = df.sort_values("vals")
-    assert isinstance(sortvaldf.index, Index)
-    assert_packed_frame_unique_pointers(sortvaldf)
-
-
-def assert_packed_frame_unique_pointers(df):
-    unpacked = unpack(pack(df))
-
-    for col in df:
-        if df._data[col].data:
-            assert df._data[col].data.get_ptr(mode="read") != unpacked._data[
-                col
-            ].data.get_ptr(mode="read")
-
-
-def test_packed_dataframe_unique_pointers_numeric():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    nelem = 10
-    df["keys"] = np.arange(nelem, dtype=np.float64)
-    df["vals"] = rng.random(nelem)
-
-    check_packed_unique_pointers(df)
-
-
-def test_packed_dataframe_unique_pointers_categorical():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = pd.Categorical(
-        ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_unique_pointers(df)
-
-
-def test_packed_dataframe_unique_pointers_list():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10)))
-    df["vals"] = rng.random(len(df))
-
-    check_packed_unique_pointers(df)
-
-
-def test_packed_dataframe_unique_pointers_struct():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(
-        list({"0": i, "1": i + 1, "2": i + 2} for i in range(10))
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_unique_pointers(df)
-
-
-def check_packed_pickled_equality(df):
-    # basic
-    assert_packed_frame_picklable(df)
-    # sliced
-    assert_packed_frame_picklable(df[:-1])
-    assert_packed_frame_picklable(df[1:])
-    assert_packed_frame_picklable(df[2:-2])
-    # sorted
-    sortvaldf = df.sort_values("vals")
-    assert isinstance(sortvaldf.index, Index)
-    assert_packed_frame_picklable(sortvaldf)
-    # out-of-band
-    buffers = []
-    serialbytes = pickle.dumps(
-        pack(df), protocol=5, buffer_callback=buffers.append
-    )
-    for b in buffers:
-        assert isinstance(b, pickle.PickleBuffer)
-    loaded = unpack(pickle.loads(serialbytes, buffers=buffers))
-    assert_eq(loaded, df)
-
-
-def assert_packed_frame_picklable(df):
-    serialbytes = pickle.dumps(pack(df))
-    loaded = unpack(pickle.loads(serialbytes))
-    assert_eq(loaded, df)
-
-
-def test_pickle_packed_dataframe_numeric():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    nelem = 10
-    df["keys"] = np.arange(nelem, dtype=np.float64)
-    df["vals"] = rng.random(nelem)
-
-    check_packed_pickled_equality(df)
-
-
-def test_pickle_packed_dataframe_categorical():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = pd.Categorical(
-        ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_pickled_equality(df)
-
-
-def test_pickle_packed_dataframe_list():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10)))
-    df["vals"] = rng.random(len(df))
-
-    check_packed_pickled_equality(df)
-
-
-def test_pickle_packed_dataframe_struct():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(
-        list({"0": i, "1": i + 1, "2": i + 2} for i in range(10))
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_pickled_equality(df)
-
-
-def check_packed_serialized_equality(df):
-    # basic
-    assert_packed_frame_serializable(df)
-    # sliced
-    assert_packed_frame_serializable(df[:-1])
-    assert_packed_frame_serializable(df[1:])
-    assert_packed_frame_serializable(df[2:-2])
-    # sorted
-    sortvaldf = df.sort_values("vals")
-    assert isinstance(sortvaldf.index, Index)
-    assert_packed_frame_serializable(sortvaldf)
-
-
-def assert_packed_frame_serializable(df):
-    packed = pack(df)
-    header, frames = packed.serialize()
-    loaded = unpack(packed.deserialize(header, frames))
-    assert_eq(loaded, df)
-
-
-def test_serialize_packed_dataframe_numeric():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    nelem = 10
-    df["keys"] = np.arange(nelem, dtype=np.float64)
-    df["vals"] = rng.random(nelem)
-
-    check_packed_serialized_equality(df)
-
-
-def test_serialize_packed_dataframe_categorical():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = pd.Categorical(
-        ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_serialized_equality(df)
-
-
-def test_serialize_packed_dataframe_list():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10)))
-    df["vals"] = rng.random(len(df))
-
-    check_packed_serialized_equality(df)
-
-
-def test_serialize_packed_dataframe_struct():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = Series(
-        list({"0": i, "1": i + 1, "2": i + 2} for i in range(10))
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_packed_serialized_equality(df)
diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py
index fcd98831686..c14fab4040b 100644
--- a/python/cudf/cudf/tests/test_scalar.py
+++ b/python/cudf/cudf/tests/test_scalar.py
@@ -10,10 +10,11 @@
 import pytest
 from packaging import version
 
+import pylibcudf as plc
 import rmm
 
 import cudf
-from cudf._lib.copying import get_element
+from cudf.core.buffer import acquire_spill_lock
 from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
@@ -143,8 +144,14 @@ def test_scalar_host_initialization(value):
 @pytest.mark.parametrize("value", SCALAR_VALUES)
 def test_scalar_device_initialization(value):
     column = cudf.Series([value], nan_as_null=False)._column
-    dev_slr = get_element(column, 0)
-
+    with acquire_spill_lock():
+        dev_slr = cudf._lib.scalar.DeviceScalar.from_pylibcudf(
+            plc.copying.get_element(
+                column.to_pylibcudf(mode="read"),
+                0,
+            ),
+            dtype=column.dtype,
+        )
     s = cudf.Scalar.from_device_scalar(dev_slr)
 
     assert s._is_device_value_current
@@ -164,8 +171,14 @@ def test_scalar_device_initialization(value):
 def test_scalar_device_initialization_decimal(value, decimal_type):
     dtype = decimal_type._from_decimal(value)
     column = cudf.Series([str(value)]).astype(dtype)._column
-    dev_slr = get_element(column, 0)
-
+    with acquire_spill_lock():
+        dev_slr = cudf._lib.scalar.DeviceScalar.from_pylibcudf(
+            plc.copying.get_element(
+                column.to_pylibcudf(mode="read"),
+                0,
+            ),
+            dtype=column.dtype,
+        )
     s = cudf.Scalar.from_device_scalar(dev_slr)
 
     assert s._is_device_value_current
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 962a229a839..fceaaf185e8 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -714,21 +714,35 @@ def read_csv(
         storage_options=None,
         **kwargs,
     ):
-        import dask_expr as dx
-        from fsspec.utils import stringify_path
+        try:
+            # TODO: Remove when cudf is pinned to dask>2024.12.0
+            import dask_expr as dx
+            from dask_expr.io.csv import ReadCSV
+            from fsspec.utils import stringify_path
+
+            if not isinstance(path, str):
+                path = stringify_path(path)
+            return dx.new_collection(
+                ReadCSV(
+                    path,
+                    dtype_backend=dtype_backend,
+                    storage_options=storage_options,
+                    kwargs=kwargs,
+                    header=header,
+                    dataframe_backend="cudf",
+                )
+            )
+        except ImportError:
+            # Requires dask>2024.12.0
+            from dask_cudf.io.csv import read_csv
 
-        if not isinstance(path, str):
-            path = stringify_path(path)
-        return dx.new_collection(
-            dx.io.csv.ReadCSV(
+            return read_csv(
                 path,
-                dtype_backend=dtype_backend,
-                storage_options=storage_options,
-                kwargs=kwargs,
+                *args,
                 header=header,
-                dataframe_backend="cudf",
+                storage_options=storage_options,
+                **kwargs,
             )
-        )
 
     @staticmethod
     def read_json(*args, **kwargs):
diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py
index b22b31a591f..29f98b14511 100644
--- a/python/dask_cudf/dask_cudf/io/csv.py
+++ b/python/dask_cudf/dask_cudf/io/csv.py
@@ -1,8 +1,193 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from dask_cudf import _deprecated_api
+import os
+from glob import glob
+from warnings import warn
 
-read_csv = _deprecated_api(
-    "dask_cudf.io.csv.read_csv",
-    new_api="dask_cudf.read_csv",
-)
+from fsspec.utils import infer_compression
+
+from dask import dataframe as dd
+from dask.dataframe.io.csv import make_reader
+from dask.utils import parse_bytes
+
+import cudf
+
+
+def read_csv(path, blocksize="default", **kwargs):
+    """
+    Read CSV files into a :class:`.DataFrame`.
+
+    This API parallelizes the :func:`cudf:cudf.read_csv` function in
+    the following ways:
+
+    It supports loading many files at once using globstrings:
+
+    >>> import dask_cudf
+    >>> df = dask_cudf.read_csv("myfiles.*.csv")
+
+    In some cases it can break up large files:
+
+    >>> df = dask_cudf.read_csv("largefile.csv", blocksize="256 MiB")
+
+    It can read CSV files from external resources (e.g. S3, HTTP, FTP)
+
+    >>> df = dask_cudf.read_csv("s3://bucket/myfiles.*.csv")
+    >>> df = dask_cudf.read_csv("https://www.mycloud.com/sample.csv")
+
+    Internally ``read_csv`` uses :func:`cudf:cudf.read_csv` and
+    supports many of the same keyword arguments with the same
+    performance guarantees. See the docstring for
+    :func:`cudf:cudf.read_csv` for more information on available
+    keyword arguments.
+
+    Parameters
+    ----------
+    path : str, path object, or file-like object
+        Either a path to a file (a str, :py:class:`pathlib.Path`, or
+        py._path.local.LocalPath), URL (including http, ftp, and S3
+        locations), or any object with a read() method (such as
+        builtin :py:func:`open` file handler function or
+        :py:class:`~io.StringIO`).
+    blocksize : int or str, default "256 MiB"
+        The target task partition size. If ``None``, a single block
+        is used for each file.
+    **kwargs : dict
+        Passthrough key-word arguments that are sent to
+        :func:`cudf:cudf.read_csv`.
+
+    Notes
+    -----
+    If any of `skipfooter`/`skiprows`/`nrows` are passed,
+    `blocksize` will default to None.
+
+    Examples
+    --------
+    >>> import dask_cudf
+    >>> ddf = dask_cudf.read_csv("sample.csv", usecols=["a", "b"])
+    >>> ddf.compute()
+       a      b
+    0  1     hi
+    1  2  hello
+    2  3     ai
+
+    """
+    # Set default `blocksize`
+    if blocksize == "default":
+        if (
+            kwargs.get("skipfooter", 0) != 0
+            or kwargs.get("skiprows", 0) != 0
+            or kwargs.get("nrows", None) is not None
+        ):
+            # Cannot read in blocks if skipfooter,
+            # skiprows or nrows is passed.
+            blocksize = None
+        else:
+            blocksize = "256 MiB"
+
+    if "://" in str(path):
+        func = make_reader(cudf.read_csv, "read_csv", "CSV")
+        return func(path, blocksize=blocksize, **kwargs)
+    else:
+        return _internal_read_csv(path=path, blocksize=blocksize, **kwargs)
+
+
+def _internal_read_csv(path, blocksize="256 MiB", **kwargs):
+    if isinstance(blocksize, str):
+        blocksize = parse_bytes(blocksize)
+
+    if isinstance(path, list):
+        filenames = path
+    elif isinstance(path, str):
+        filenames = sorted(glob(path))
+    elif hasattr(path, "__fspath__"):
+        filenames = sorted(glob(path.__fspath__()))
+    else:
+        raise TypeError(f"Path type not understood:{type(path)}")
+
+    if not filenames:
+        msg = f"A file in: {filenames} does not exist."
+        raise FileNotFoundError(msg)
+
+    compression = kwargs.get("compression", "infer")
+
+    if compression == "infer":
+        # Infer compression from first path by default
+        compression = infer_compression(filenames[0])
+
+    if compression and blocksize:
+        # compressed CSVs reading must read the entire file
+        kwargs.pop("byte_range", None)
+        warn(
+            "Warning %s compression does not support breaking apart files\n"
+            "Please ensure that each individual file can fit in memory and\n"
+            "use the keyword ``blocksize=None to remove this message``\n"
+            "Setting ``blocksize=(size of file)``" % compression
+        )
+        blocksize = None
+
+    if blocksize is None:
+        return read_csv_without_blocksize(path, **kwargs)
+
+    # Let dask.dataframe generate meta
+    dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV")
+    kwargs1 = kwargs.copy()
+    usecols = kwargs1.pop("usecols", None)
+    dtype = kwargs1.pop("dtype", None)
+    meta = dask_reader(filenames[0], **kwargs1)._meta
+    names = meta.columns
+    if usecols or dtype:
+        # Regenerate meta with original kwargs if
+        # `usecols` or `dtype` was specified
+        meta = dask_reader(filenames[0], **kwargs)._meta
+
+    i = 0
+    path_list = []
+    kwargs_list = []
+    for fn in filenames:
+        size = os.path.getsize(fn)
+        for start in range(0, size, blocksize):
+            kwargs2 = kwargs.copy()
+            kwargs2["byte_range"] = (
+                start,
+                blocksize,
+            )  # specify which chunk of the file we care about
+            if start != 0:
+                kwargs2["names"] = names  # no header in the middle of the file
+                kwargs2["header"] = None
+            path_list.append(fn)
+            kwargs_list.append(kwargs2)
+            i += 1
+
+    return dd.from_map(_read_csv, path_list, kwargs_list, meta=meta)
+
+
+def _read_csv(fn, kwargs):
+    return cudf.read_csv(fn, **kwargs)
+
+
+def read_csv_without_blocksize(path, **kwargs):
+    """Read entire CSV with optional compression (gzip/zip)
+
+    Parameters
+    ----------
+    path : str
+        path to files (support for glob)
+    """
+    if isinstance(path, list):
+        filenames = path
+    elif isinstance(path, str):
+        filenames = sorted(glob(path))
+    elif hasattr(path, "__fspath__"):
+        filenames = sorted(glob(path.__fspath__()))
+    else:
+        raise TypeError(f"Path type not understood:{type(path)}")
+
+    meta_kwargs = kwargs.copy()
+    if "skipfooter" in meta_kwargs:
+        meta_kwargs.pop("skipfooter")
+    if "nrows" in meta_kwargs:
+        meta_kwargs.pop("nrows")
+    # Read "head" of first file (first 5 rows).
+    # Convert to empty df for metadata.
+    meta = cudf.read_csv(filenames[0], nrows=5, **meta_kwargs).iloc[:0]
+    return dd.from_map(cudf.read_csv, filenames, meta=meta, **kwargs)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
index a0acb86f5a9..ddfd1c1adac 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
@@ -185,11 +185,6 @@ def test_read_csv_blocksize_none(tmp_path, compression, size):
     df2 = dask_cudf.read_csv(path, blocksize=None, dtype=typ)
     dd.assert_eq(df, df2)
 
-    # Test chunksize deprecation
-    with pytest.warns(FutureWarning, match="deprecated"):
-        df3 = dask_cudf.read_csv(path, chunksize=None, dtype=typ)
-    dd.assert_eq(df, df3)
-
 
 @pytest.mark.parametrize("dtype", [{"b": str, "c": int}, None])
 def test_csv_reader_usecols(tmp_path, dtype):
@@ -275,7 +270,3 @@ def test_deprecated_api_paths(tmp_path):
     with pytest.warns(match="dask_cudf.io.read_csv is now deprecated"):
         df2 = dask_cudf.io.read_csv(csv_path)
     dd.assert_eq(df, df2, check_divisions=False)
-
-    with pytest.warns(match="dask_cudf.io.csv.read_csv is now deprecated"):
-        df2 = dask_cudf.io.csv.read_csv(csv_path)
-    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/pylibcudf/pylibcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/io/avro.pxd
index 8696fcb3c15..a0fca95d459 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pxd
+++ b/python/pylibcudf/pylibcudf/io/avro.pxd
@@ -1,12 +1,23 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
-from pylibcudf.libcudf.io.avro cimport avro_reader_options
+from pylibcudf.libcudf.io.avro cimport avro_reader_options, avro_reader_options_builder
 from pylibcudf.libcudf.types cimport size_type
 
 
-cpdef TableWithMetadata read_avro(
-    SourceInfo source_info,
-    list columns = *,
-    size_type skip_rows = *,
-    size_type num_rows = *
-)
+from pylibcudf.libcudf.types cimport size_type
+
+cdef class AvroReaderOptions:
+    cdef avro_reader_options c_obj
+    cdef SourceInfo source
+    cpdef void set_columns(self, list col_names)
+
+
+cdef class AvroReaderOptionsBuilder:
+    cdef avro_reader_options_builder c_obj
+    cdef SourceInfo source
+    cpdef AvroReaderOptionsBuilder columns(self, list col_names)
+    cpdef AvroReaderOptionsBuilder skip_rows(self, size_type skip_rows)
+    cpdef AvroReaderOptionsBuilder num_rows(self, size_type num_rows)
+    cpdef AvroReaderOptions build(self)
+
+cpdef TableWithMetadata read_avro(AvroReaderOptions options)
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi
index 49c2f083702..8cafc9a6573 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyi
+++ b/python/pylibcudf/pylibcudf/io/avro.pyi
@@ -1,11 +1,16 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from pylibcudf.io.types import SourceInfo, TableWithMetadata
 
-__all__ = ["read_avro"]
-
-def read_avro(
-    source_info: SourceInfo,
-    columns: list[str] | None = None,
-    skip_rows: int = 0,
-    num_rows: int = -1,
-) -> TableWithMetadata: ...
+__all__ = ["AvroReaderOptions", "AvroReaderOptionsBuilder", "read_avro"]
+
+class AvroReaderOptions:
+    @staticmethod
+    def builder(source: SourceInfo) -> AvroReaderOptionsBuilder: ...
+
+class AvroReaderOptionsBuilder:
+    def columns(col_names: list[str]) -> AvroReaderOptionsBuilder: ...
+    def skip_rows(skip_rows: int) -> AvroReaderOptionsBuilder: ...
+    def num_rows(num_rows: int) -> AvroReaderOptionsBuilder: ...
+    def build(self) -> AvroReaderOptions: ...
+
+def read_avro(options: AvroReaderOptions) -> TableWithMetadata: ...
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx
index 4271333511a..c378fca0415 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyx
+++ b/python/pylibcudf/pylibcudf/io/avro.pyx
@@ -10,52 +10,138 @@ from pylibcudf.libcudf.io.avro cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
-__all__ = ["read_avro"]
+__all__ = ["read_avro", "AvroReaderOptions", "AvroReaderOptionsBuilder"]
+
+
+cdef class AvroReaderOptions:
+    """
+    The settings to use for ``read_avro``
+    For details, see :cpp:class:`cudf::io::avro_reader_options`
+    """
+    @staticmethod
+    def builder(SourceInfo source):
+        """
+        Create a AvroWriterOptionsBuilder object
+
+        For details, see :cpp:func:`cudf::io::avro_reader_options::builder`
+
+        Parameters
+        ----------
+        sink : SourceInfo
+            The source to read the Avro file from.
+
+        Returns
+        -------
+        AvroReaderOptionsBuilder
+            Builder to build AvroReaderOptions
+        """
+        cdef AvroReaderOptionsBuilder avro_builder = AvroReaderOptionsBuilder.__new__(
+            AvroReaderOptionsBuilder
+        )
+        avro_builder.c_obj = avro_reader_options.builder(source.c_obj)
+        avro_builder.source = source
+        return avro_builder
+
+    cpdef void set_columns(self, list col_names):
+        """
+        Set names of the column to be read.
+
+        Parameters
+        ----------
+        col_names : list[str]
+            List of column names
+
+        Returns
+        -------
+        None
+        """
+        cdef vector[string] vec
+        vec.reserve(len(col_names))
+        for name in col_names:
+            vec.push_back(str(name).encode())
+        self.c_obj.set_columns(vec)
+
+
+cdef class AvroReaderOptionsBuilder:
+    cpdef AvroReaderOptionsBuilder columns(self, list col_names):
+        """
+        Set names of the column to be read.
+
+        Parameters
+        ----------
+        col_names : list
+            List of column names
+
+        Returns
+        -------
+        AvroReaderOptionsBuilder
+        """
+        cdef vector[string] vec
+        vec.reserve(len(col_names))
+        for name in col_names:
+            vec.push_back(str(name).encode())
+        self.c_obj.columns(vec)
+        return self
+
+    cpdef AvroReaderOptionsBuilder skip_rows(self, size_type skip_rows):
+        """
+        Sets number of rows to skip.
+
+        Parameters
+        ----------
+        skip_rows : size_type
+            Number of rows to skip from start
+
+        Returns
+        -------
+        AvroReaderOptionsBuilder
+        """
+        self.c_obj.skip_rows(skip_rows)
+        return self
+
+    cpdef AvroReaderOptionsBuilder num_rows(self, size_type num_rows):
+        """
+        Sets number of rows to read.
+
+        Parameters
+        ----------
+        num_rows : size_type
+            Number of rows to read after skip
+
+        Returns
+        -------
+        AvroReaderOptionsBuilder
+        """
+        self.c_obj.num_rows(num_rows)
+        return self
+
+    cpdef AvroReaderOptions build(self):
+        """Create a AvroReaderOptions object"""
+        cdef AvroReaderOptions avro_options = AvroReaderOptions.__new__(
+            AvroReaderOptions
+        )
+        avro_options.c_obj = move(self.c_obj.build())
+        avro_options.source = self.source
+        return avro_options
 
 
 cpdef TableWithMetadata read_avro(
-    SourceInfo source_info,
-    list columns = None,
-    size_type skip_rows = 0,
-    size_type num_rows = -1
+    AvroReaderOptions options
 ):
     """
-    Reads an Avro dataset into a :py:class:`~.types.TableWithMetadata`.
+    Read from Avro format.
+
+    The source to read from and options are encapsulated
+    by the `options` object.
 
     For details, see :cpp:func:`read_avro`.
 
     Parameters
     ----------
-    source_info: SourceInfo
-        The SourceInfo object to read the avro dataset from.
-    columns: list, default None
-        Optional columns to read, if not provided, reads all columns in the file.
-    skip_rows: size_type, default 0
-        The number of rows to skip.
-    num_rows: size_type, default -1
-        The number of rows to read, after skipping rows.
-        If -1 is passed, all rows will be read.
-
-    Returns
-    -------
-    TableWithMetadata
-        The Table and its corresponding metadata (column names) that were read in.
+    options: AvroReaderOptions
+        Settings for controlling reading behavior
     """
-    cdef vector[string] c_columns
-    if columns is not None and len(columns) > 0:
-        c_columns.reserve(len(columns))
-        for col in columns:
-            c_columns.push_back(str(col).encode())
-
-    cdef avro_reader_options avro_opts = (
-        avro_reader_options.builder(source_info.c_obj)
-        .columns(c_columns)
-        .skip_rows(skip_rows)
-        .num_rows(num_rows)
-        .build()
-    )
-
     with nogil:
-        c_result = move(cpp_read_avro(avro_opts))
+        c_result = move(cpp_read_avro(options.c_obj))
 
     return TableWithMetadata.from_libcudf(c_result)
diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd
index f65c1034598..4894ca3bd6e 100644
--- a/python/pylibcudf/pylibcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/io/json.pxd
@@ -6,8 +6,13 @@ from pylibcudf.io.types cimport (
     TableWithMetadata,
     compression_type,
 )
-from pylibcudf.libcudf.io.json cimport json_recovery_mode_t
+from pylibcudf.libcudf.io.json cimport (
+    json_recovery_mode_t,
+    json_writer_options,
+    json_writer_options_builder,
+)
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.table cimport Table
 
 
 cpdef TableWithMetadata read_json(
@@ -21,19 +26,28 @@ cpdef TableWithMetadata read_json(
     bool mixed_types_as_string = *,
     bool prune_columns = *,
     json_recovery_mode_t recovery_mode = *,
+    dict extra_parameters = *,
 )
 
+cdef class JsonWriterOptions:
+    cdef json_writer_options c_obj
+    cdef SinkInfo sink
+    cdef Table table
+    cpdef void set_rows_per_chunk(self, size_type val)
+    cpdef void set_true_value(self, str val)
+    cpdef void set_false_value(self, str val)
 
-cpdef void write_json(
-    SinkInfo sink_info,
-    TableWithMetadata tbl,
-    str na_rep = *,
-    bool include_nulls = *,
-    bool lines = *,
-    size_type rows_per_chunk = *,
-    str true_value = *,
-    str false_value = *
-)
+cdef class JsonWriterOptionsBuilder:
+    cdef json_writer_options_builder c_obj
+    cdef SinkInfo sink
+    cdef Table table
+    cpdef JsonWriterOptionsBuilder metadata(self, TableWithMetadata tbl_w_meta)
+    cpdef JsonWriterOptionsBuilder na_rep(self, str val)
+    cpdef JsonWriterOptionsBuilder include_nulls(self, bool val)
+    cpdef JsonWriterOptionsBuilder lines(self, bool val)
+    cpdef JsonWriterOptions build(self)
+
+cpdef void write_json(JsonWriterOptions options)
 
 cpdef tuple chunked_read_json(
     SourceInfo source_info,
diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi
index b2bc6a43700..e0489742cd0 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyi
+++ b/python/pylibcudf/pylibcudf/io/json.pyi
@@ -2,6 +2,8 @@
 from collections.abc import Mapping
 from typing import TypeAlias
 
+from typing_extensions import Self
+
 from pylibcudf.column import Column
 from pylibcudf.io.types import (
     CompressionType,
@@ -10,6 +12,7 @@ from pylibcudf.io.types import (
     SourceInfo,
     TableWithMetadata,
 )
+from pylibcudf.table import Table
 from pylibcudf.types import DataType
 
 ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap]
@@ -28,16 +31,22 @@ def read_json(
     prune_columns: bool = False,
     recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL,
 ) -> TableWithMetadata: ...
-def write_json(
-    sink_info: SinkInfo,
-    table_w_meta: TableWithMetadata,
-    na_rep: str = "",
-    include_nulls: bool = False,
-    lines: bool = False,
-    rows_per_chunk: int = 2**32 - 1,
-    true_value: str = "true",
-    false_value: str = "false",
-) -> None: ...
+
+class JsonWriterOptions:
+    @staticmethod
+    def builder(sink: SinkInfo, table: Table) -> JsonWriterOptionsBuilder: ...
+    def set_rows_per_chunk(self, val: int) -> None: ...
+    def set_true_value(self, val: str) -> None: ...
+    def set_false_value(self, val: str) -> None: ...
+
+class JsonWriterOptionsBuilder:
+    def metadata(self, tbl_w_meta: TableWithMetadata) -> Self: ...
+    def na_rep(self, val: str) -> Self: ...
+    def include_nulls(self, val: bool) -> Self: ...
+    def lines(self, val: bool) -> Self: ...
+    def build(self) -> JsonWriterOptions: ...
+
+def write_json(options: JsonWriterOptions) -> None: ...
 def chunked_read_json(
     source_info: SourceInfo,
     dtypes: list[NameAndType] | None = None,
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index ad2989925c9..16078b31566 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -1,6 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from libcpp cimport bool
-from libcpp.limits cimport numeric_limits
 from libcpp.map cimport map
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -17,13 +16,18 @@ from pylibcudf.libcudf.io.json cimport (
 )
 from pylibcudf.libcudf.io.types cimport (
     compression_type,
-    table_metadata,
     table_with_metadata,
 )
 from pylibcudf.libcudf.types cimport data_type, size_type
 from pylibcudf.types cimport DataType
 
-__all__ = ["chunked_read_json", "read_json", "write_json"]
+__all__ = [
+    "chunked_read_json",
+    "read_json",
+    "write_json",
+    "JsonWriterOptions",
+    "JsonWriterOptionsBuilder"
+]
 
 cdef map[string, schema_element] _generate_schema_map(list dtypes):
     cdef map[string, schema_element] schema_map
@@ -57,8 +61,10 @@ cdef json_reader_options _setup_json_reader_options(
         bool keep_quotes,
         bool mixed_types_as_string,
         bool prune_columns,
-        json_recovery_mode_t recovery_mode):
+        json_recovery_mode_t recovery_mode,
+        dict extra_parameters=None):
 
+    cdef vector[string] na_vec
     cdef vector[data_type] types_vec
     cdef json_reader_options opts = (
         json_reader_options.builder(source_info.c_obj)
@@ -81,6 +87,39 @@ cdef json_reader_options _setup_json_reader_options(
     opts.enable_keep_quotes(keep_quotes)
     opts.enable_mixed_types_as_string(mixed_types_as_string)
     opts.enable_prune_columns(prune_columns)
+
+    # These hidden options are subjected to change without deprecation cycle.
+    # These are used to test libcudf JSON reader features, not used in cuDF.
+    if extra_parameters is not None:
+        for key, value in extra_parameters.items():
+            if key == 'delimiter':
+                opts.set_delimiter(ord(value))
+            elif key == 'dayfirst':
+                opts.enable_dayfirst(value)
+            elif key == 'experimental':
+                opts.enable_experimental(value)
+            elif key == 'normalize_single_quotes':
+                opts.enable_normalize_single_quotes(value)
+            elif key == 'normalize_whitespace':
+                opts.enable_normalize_whitespace(value)
+            elif key == 'strict_validation':
+                opts.set_strict_validation(value)
+            elif key == 'allow_unquoted_control_chars':
+                opts.allow_unquoted_control_chars(value)
+            elif key == 'allow_numeric_leading_zeros':
+                opts.allow_numeric_leading_zeros(value)
+            elif key == 'allow_nonnumeric_numbers':
+                opts.allow_nonnumeric_numbers(value)
+            elif key == 'na_values':
+                for na_val in value:
+                    if isinstance(na_val, str):
+                        na_vec.push_back(na_val.encode())
+                opts.set_na_values(na_vec)
+            else:
+                raise ValueError(
+                    "cudf engine doesn't support the "
+                    f"'{key}' keyword argument for read_json"
+                )
     return opts
 
 
@@ -196,6 +235,7 @@ cpdef TableWithMetadata read_json(
     bool mixed_types_as_string = False,
     bool prune_columns = False,
     json_recovery_mode_t recovery_mode = json_recovery_mode_t.FAIL,
+    dict extra_parameters = None,
 ):
     """Reads an JSON file into a :py:class:`~.types.TableWithMetadata`.
 
@@ -227,6 +267,8 @@ cpdef TableWithMetadata read_json(
     recover_mode : JSONRecoveryMode, default JSONRecoveryMode.FAIL
         Whether to raise an error or set corresponding values to null
         when encountering an invalid JSON line.
+    extra_parameters : dict, default None
+        Additional hidden parameters to pass to the JSON reader.
 
     Returns
     -------
@@ -244,6 +286,7 @@ cpdef TableWithMetadata read_json(
         mixed_types_as_string=mixed_types_as_string,
         prune_columns=prune_columns,
         recovery_mode=recovery_mode,
+        extra_parameters=extra_parameters,
     )
 
     # Read JSON
@@ -255,56 +298,171 @@ cpdef TableWithMetadata read_json(
     return TableWithMetadata.from_libcudf(c_result)
 
 
-cpdef void write_json(
-    SinkInfo sink_info,
-    TableWithMetadata table_w_meta,
-    str na_rep = "",
-    bool include_nulls = False,
-    bool lines = False,
-    size_type rows_per_chunk = numeric_limits[size_type].max(),
-    str true_value = "true",
-    str false_value = "false"
-):
+cdef class JsonWriterOptions:
     """
-    Writes a :py:class:`~pylibcudf.table.Table` to JSON format.
+    The settings to use for ``write_json``
 
-    Parameters
-    ----------
-    sink_info: SinkInfo
-        The SinkInfo object to write the JSON to.
-    table_w_meta: TableWithMetadata
-        The TableWithMetadata object containing the Table to write
-    na_rep: str, default ""
-        The string representation for null values.
-    include_nulls: bool, default False
+    For details, see :cpp:class:`cudf::io::json_writer_options`
+    """
+    @staticmethod
+    def builder(SinkInfo sink, Table table):
+        """
+        Create a JsonWriterOptionsBuilder object
+
+        Parameters
+        ----------
+        sink : SinkInfo
+            The sink used for writer output
+        table : Table
+            Table to be written to output
+
+        Returns
+        -------
+        JsonWriterOptionsBuilder
+            Builder to build JsonWriterOptions
+        """
+        cdef JsonWriterOptionsBuilder json_builder = (
+            JsonWriterOptionsBuilder.__new__(JsonWriterOptionsBuilder)
+        )
+        json_builder.c_obj = json_writer_options.builder(sink.c_obj, table.view())
+        json_builder.sink = sink
+        json_builder.table = table
+        return json_builder
+
+    cpdef void set_rows_per_chunk(self, size_type val):
+        """
+        Sets string to used for null entries.
+
+        Parameters
+        ----------
+        val : size_type
+            String to represent null value
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.set_rows_per_chunk(val)
+
+    cpdef void set_true_value(self, str val):
+        """
+        Sets string used for values != 0
+
+        Parameters
+        ----------
+        val : str
+            String to represent values != 0
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.set_true_value(val.encode())
+
+    cpdef void set_false_value(self, str val):
+        """
+        Sets string used for values == 0
+
+        Parameters
+        ----------
+        val : str
+            String to represent values == 0
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.set_false_value(val.encode())
+
+
+cdef class JsonWriterOptionsBuilder:
+    cpdef JsonWriterOptionsBuilder metadata(self, TableWithMetadata tbl_w_meta):
+        """
+        Sets optional metadata (with column names).
+
+        Parameters
+        ----------
+        tbl_w_meta : TableWithMetadata
+            Associated metadata
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.metadata(tbl_w_meta.metadata)
+        return self
+
+    cpdef JsonWriterOptionsBuilder na_rep(self, str val):
+        """
+        Sets string to used for null entries.
+
+        Parameters
+        ----------
+        val : str
+            String to represent null value
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.na_rep(val.encode())
+        return self
+
+    cpdef JsonWriterOptionsBuilder include_nulls(self, bool val):
+        """
         Enables/Disables output of nulls as 'null'.
-    lines: bool, default False
-        If `True`, write output in the JSON lines format.
-    rows_per_chunk: size_type, defaults to length of the input table
-        The maximum number of rows to write at a time.
-    true_value: str, default "true"
-        The string representation for values != 0 in INT8 types.
-    false_value: str, default "false"
-        The string representation for values == 0 in INT8 types.
+
+        Parameters
+        ----------
+        val : bool
+            Boolean value to enable/disable
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.include_nulls(val)
+        return self
+
+    cpdef JsonWriterOptionsBuilder lines(self, bool val):
+        """
+        Enables/Disables JSON lines for records format.
+
+        Parameters
+        ----------
+        val : bool
+            Boolean value to enable/disable
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.lines(val)
+        return self
+
+    cpdef JsonWriterOptions build(self):
+        """Create a JsonWriterOptions object"""
+        cdef JsonWriterOptions json_options = JsonWriterOptions.__new__(
+            JsonWriterOptions
+        )
+        json_options.c_obj = move(self.c_obj.build())
+        json_options.sink = self.sink
+        json_options.table = self.table
+        return json_options
+
+
+cpdef void write_json(JsonWriterOptions options):
     """
-    cdef table_metadata tbl_meta = table_w_meta.metadata
-    cdef string na_rep_c = na_rep.encode()
-
-    cdef json_writer_options options = (
-        json_writer_options.builder(sink_info.c_obj, table_w_meta.tbl.view())
-        .metadata(tbl_meta)
-        .na_rep(na_rep_c)
-        .include_nulls(include_nulls)
-        .lines(lines)
-        .build()
-    )
+    Writes a set of columns to JSON format.
 
-    if rows_per_chunk != numeric_limits[size_type].max():
-        options.set_rows_per_chunk(rows_per_chunk)
-    if true_value != "true":
-        options.set_true_value(<string>true_value.encode())
-    if false_value != "false":
-        options.set_false_value(<string>false_value.encode())
+    Parameters
+    ----------
+    options : JsonWriterOptions
+        Settings for controlling writing behavior
 
+    Returns
+    -------
+    None
+    """
     with nogil:
-        cpp_write_json(options)
+        cpp_write_json(options.c_obj)
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
index a7ca6978621..c241c478f25 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
@@ -5,6 +5,7 @@ from libc.stdint cimport int32_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport shared_ptr, unique_ptr
+from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -17,6 +18,7 @@ cdef extern from "cudf/io/json.hpp" \
     cdef struct schema_element:
         data_type type
         map[string, schema_element] child_types
+        optional[vector[string]] column_order
 
     cpdef enum class json_recovery_mode_t(int32_t):
         FAIL
@@ -30,30 +32,51 @@ cdef extern from "cudf/io/json.hpp" \
             except +libcudf_exception_handler
         size_t get_byte_range_offset() except +libcudf_exception_handler
         size_t get_byte_range_size() except +libcudf_exception_handler
+        size_t get_byte_range_size_with_padding() except +libcudf_exception_handler
+        size_t get_byte_range_padding() except +libcudf_exception_handler
+        char get_delimiter() except +libcudf_exception_handler
         bool is_enabled_lines() except +libcudf_exception_handler
         bool is_enabled_mixed_types_as_string() except +libcudf_exception_handler
         bool is_enabled_prune_columns() except +libcudf_exception_handler
-        bool is_enabled_dayfirst() except +libcudf_exception_handler
         bool is_enabled_experimental() except +libcudf_exception_handler
+        bool is_enabled_dayfirst() except +libcudf_exception_handler
+        bool is_enabled_keep_quotes() except +libcudf_exception_handler
+        bool is_enabled_normalize_single_quotes() except +libcudf_exception_handler
+        bool is_enabled_normalize_whitespace() except +libcudf_exception_handler
+        json_recovery_mode_t recovery_mode() except +libcudf_exception_handler
+        bool is_strict_validation() except +libcudf_exception_handler
+        bool is_allowed_numeric_leading_zeros() except +libcudf_exception_handler
+        bool is_allowed_nonnumeric_numbers() except +libcudf_exception_handler
+        bool is_allowed_unquoted_control_chars() except +libcudf_exception_handler
+        vector[string] get_na_values() except +libcudf_exception_handler
 
         # setter
-        void set_dtypes(
-            vector[data_type] types
-        ) except +libcudf_exception_handler
-        void set_dtypes(
-            map[string, schema_element] types
-        ) except +libcudf_exception_handler
-        void set_compression(
-            cudf_io_types.compression_type compression
-        ) except +libcudf_exception_handler
+        void set_dtypes(vector[data_type] types) except +libcudf_exception_handler
+        void set_dtypes(map[string, data_type] types) except +libcudf_exception_handler
+        void set_dtypes(map[string, schema_element] types)\
+            except +libcudf_exception_handler
+        void set_dtypes(schema_element types) except +libcudf_exception_handler
+        void set_compression(cudf_io_types.compression_type comp_type)\
+            except +libcudf_exception_handler
         void set_byte_range_offset(size_t offset) except +libcudf_exception_handler
         void set_byte_range_size(size_t size) except +libcudf_exception_handler
+        void set_delimiter(char delimiter) except +libcudf_exception_handler
         void enable_lines(bool val) except +libcudf_exception_handler
         void enable_mixed_types_as_string(bool val) except +libcudf_exception_handler
         void enable_prune_columns(bool val) except +libcudf_exception_handler
-        void enable_dayfirst(bool val) except +libcudf_exception_handler
         void enable_experimental(bool val) except +libcudf_exception_handler
+        void enable_dayfirst(bool val) except +libcudf_exception_handler
         void enable_keep_quotes(bool val) except +libcudf_exception_handler
+        void enable_normalize_single_quotes(bool val) except +libcudf_exception_handler
+
+        void enable_normalize_whitespace(bool val) except +libcudf_exception_handler
+        void set_recovery_mode(json_recovery_mode_t val)\
+            except +libcudf_exception_handler
+        void set_strict_validation(bool val) except +libcudf_exception_handler
+        void allow_numeric_leading_zeros(bool val) except +libcudf_exception_handler
+        void allow_nonnumeric_numbers(bool val) except +libcudf_exception_handler
+        void allow_unquoted_control_chars(bool val) except +libcudf_exception_handler
+        void set_na_values(vector[string] vals) except +libcudf_exception_handler
 
         @staticmethod
         json_reader_options_builder builder(
@@ -74,6 +97,9 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& dtypes(
             map[string, schema_element] types
         ) except +libcudf_exception_handler
+        json_reader_options_builder& dtypes(
+            schema_element types
+        ) except +libcudf_exception_handler
         json_reader_options_builder& compression(
             cudf_io_types.compression_type compression
         ) except +libcudf_exception_handler
@@ -83,6 +109,9 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& byte_range_size(
             size_t size
         ) except +libcudf_exception_handler
+        json_reader_options_builder& delimiter(
+            char delimiter
+        ) except +libcudf_exception_handler
         json_reader_options_builder& lines(
             bool val
         ) except +libcudf_exception_handler
@@ -92,16 +121,36 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& prune_columns(
             bool val
         ) except +libcudf_exception_handler
+        json_reader_options_builder& experimental(
+            bool val
+        ) except +libcudf_exception_handler
         json_reader_options_builder& dayfirst(
             bool val
         ) except +libcudf_exception_handler
         json_reader_options_builder& keep_quotes(
             bool val
         ) except +libcudf_exception_handler
+        json_reader_options_builder& normalize_single_quotes(
+            bool val
+        ) except +libcudf_exception_handler
+        json_reader_options_builder& normalize_whitespace(
+            bool val
+        ) except +libcudf_exception_handler
         json_reader_options_builder& recovery_mode(
             json_recovery_mode_t val
         ) except +libcudf_exception_handler
 
+        json_reader_options_builder& strict_validation(bool val)\
+            except +libcudf_exception_handler
+        json_reader_options_builder& numeric_leading_zeros(bool val)\
+            except +libcudf_exception_handler
+        json_reader_options_builder& nonnumeric_numbers(bool val)\
+            except +libcudf_exception_handler
+        json_reader_options_builder& unquoted_control_chars(bool val)\
+            except +libcudf_exception_handler
+        json_reader_options_builder& na_values(vector[string] vals)\
+            except +libcudf_exception_handler
+
         json_reader_options build() except +libcudf_exception_handler
 
     cdef cudf_io_types.table_with_metadata read_json(
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_avro.py b/python/pylibcudf/pylibcudf/tests/io/test_avro.py
index 3d9d99ffa61..bda8921b62a 100644
--- a/python/pylibcudf/pylibcudf/tests/io/test_avro.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_avro.py
@@ -98,10 +98,15 @@ def test_read_avro(avro_dtypes, avro_dtype_data, row_opts, columns, nullable):
     buffer.seek(0)
 
     res = plc.io.avro.read_avro(
-        plc.io.types.SourceInfo([buffer]),
-        columns=columns,
-        skip_rows=skip_rows,
-        num_rows=num_rows,
+        (
+            plc.io.avro.AvroReaderOptions.builder(
+                plc.io.types.SourceInfo([buffer])
+            )
+            .columns(columns)
+            .skip_rows(skip_rows)
+            .num_rows(num_rows)
+            .build()
+        )
     )
 
     expected = pa.Table.from_arrays(
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_json.py b/python/pylibcudf/pylibcudf/tests/io/test_json.py
index 453e5ce32a8..9b0c5a29fe8 100644
--- a/python/pylibcudf/pylibcudf/tests/io/test_json.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_json.py
@@ -24,13 +24,19 @@ def test_write_json_basic(table_data, source_or_sink, lines, rows_per_chunk):
     plc_table_w_meta, pa_table = table_data
     sink = source_or_sink
 
-    plc.io.json.write_json(
-        plc.io.SinkInfo([sink]),
-        plc_table_w_meta,
-        lines=lines,
-        rows_per_chunk=rows_per_chunk,
+    options = (
+        plc.io.json.JsonWriterOptions.builder(
+            plc.io.SinkInfo([sink]), plc_table_w_meta.tbl
+        )
+        .metadata(plc_table_w_meta)
+        .lines(lines)
+        .build()
     )
 
+    options.set_rows_per_chunk(rows_per_chunk)
+
+    plc.io.json.write_json(options)
+
     exp = pa_table.to_pandas()
 
     # Convert everything to string to make
@@ -57,13 +63,18 @@ def test_write_json_nulls(na_rep, include_nulls):
 
     sink = io.StringIO()
 
-    plc.io.json.write_json(
-        plc.io.SinkInfo([sink]),
-        plc_tbl_w_meta,
-        na_rep=na_rep,
-        include_nulls=include_nulls,
+    options = (
+        plc.io.json.JsonWriterOptions.builder(
+            plc.io.SinkInfo([sink]), plc_tbl_w_meta.tbl
+        )
+        .metadata(plc_tbl_w_meta)
+        .na_rep(na_rep)
+        .include_nulls(include_nulls)
+        .build()
     )
 
+    plc.io.json.write_json(options)
+
     exp = pa_tbl.to_pandas()
 
     # Convert everything to string to make
@@ -100,15 +111,21 @@ def test_write_json_bool_opts(true_value, false_value):
 
     sink = io.StringIO()
 
-    plc.io.json.write_json(
-        plc.io.SinkInfo([sink]),
-        plc_tbl_w_meta,
-        include_nulls=True,
-        na_rep="null",
-        true_value=true_value,
-        false_value=false_value,
+    options = (
+        plc.io.json.JsonWriterOptions.builder(
+            plc.io.SinkInfo([sink]), plc_tbl_w_meta.tbl
+        )
+        .metadata(plc_tbl_w_meta)
+        .na_rep("null")
+        .include_nulls(True)
+        .build()
     )
 
+    options.set_true_value(true_value)
+    options.set_false_value(false_value)
+
+    plc.io.json.write_json(options)
+
     exp = pa_tbl.to_pandas()
 
     # Convert everything to string to make