diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 900e9eed98e..a84f7bd5224 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -413,11 +413,13 @@ add_library(
   src/io/utilities/arrow_io_source.cpp
   src/io/utilities/column_buffer.cpp
   src/io/utilities/config_utils.cpp
+  src/io/utilities/data_casting.cu
   src/io/utilities/data_sink.cpp
   src/io/utilities/datasource.cpp
   src/io/utilities/file_io_utilities.cpp
   src/io/utilities/parsing_utils.cu
   src/io/utilities/row_selection.cpp
+  src/io/utilities/type_inference.cu
   src/io/utilities/trie.cu
   src/jit/cache.cpp
   src/jit/parser.cpp
diff --git a/cpp/include/cudf/io/detail/data_casting.cuh b/cpp/include/cudf/io/detail/data_casting.cuh
deleted file mode 100644
index b7ee5e05e96..00000000000
--- a/cpp/include/cudf/io/detail/data_casting.cuh
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <io/utilities/parsing_utils.cuh>
-
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/strings/detail/strings_children.cuh>
-#include <cudf/strings/detail/utf8.hpp>
-#include <cudf/types.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <cub/cub.cuh>
-
-#include <memory>
-
-namespace cudf::io::json::detail {
-
-// Unicode code point escape sequence
-static constexpr char UNICODE_SEQ = 0x7F;
-
-// Invalid escape sequence
-static constexpr char NON_ESCAPE_CHAR = 0x7E;
-
-// Unicode code point escape sequence prefix comprises '\' and 'u' characters
-static constexpr size_type UNICODE_ESC_PREFIX = 2;
-
-// Unicode code point escape sequence comprises four hex characters
-static constexpr size_type UNICODE_HEX_DIGIT_COUNT = 4;
-
-// A unicode code point escape sequence is \uXXXX
-static auto constexpr NUM_UNICODE_ESC_SEQ_CHARS = UNICODE_ESC_PREFIX + UNICODE_HEX_DIGIT_COUNT;
-
-static constexpr auto UTF16_HIGH_SURROGATE_BEGIN = 0xD800;
-static constexpr auto UTF16_HIGH_SURROGATE_END   = 0xDC00;
-static constexpr auto UTF16_LOW_SURROGATE_BEGIN  = 0xDC00;
-static constexpr auto UTF16_LOW_SURROGATE_END    = 0xE000;
-
-/**
- * @brief Describing whether data casting of a certain item succeed, the item was parsed to null, or
- * whether type casting failed.
- */
-enum class data_casting_result { PARSING_SUCCESS, PARSED_TO_NULL, PARSING_FAILURE };
-
-/**
- * @brief Providing additional information about the type casting result.
- */
-struct data_casting_result_info {
-  // Number of bytes written to output
-  size_type bytes;
-  // Whether parsing succeeded, item was parsed to null, or failed
-  data_casting_result result;
-};
-
-/**
- * @brief Returns the character to output for a given escaped character that's following a
- * backslash.
- *
- * @param escaped_char The character following the backslash.
- * @return The character to output for a given character that's following a backslash
- */
-__device__ __forceinline__ char get_escape_char(char escaped_char)
-{
-  switch (escaped_char) {
-    case '"': return '"';
-    case '\\': return '\\';
-    case '/': return '/';
-    case 'b': return '\b';
-    case 'f': return '\f';
-    case 'n': return '\n';
-    case 'r': return '\r';
-    case 't': return '\t';
-    case 'u': return UNICODE_SEQ;
-    default: return NON_ESCAPE_CHAR;
-  }
-}
-
-/**
- * @brief Returns the escaped characters for a given character.
- *
- * @param escaped_char The character to escape.
- * @return The escaped characters for a given character.
- */
-__device__ __forceinline__ thrust::pair<char, char> get_escaped_char(char escaped_char)
-{
-  switch (escaped_char) {
-    case '"': return {'\\', '"'};
-    case '\\': return {'\\', '\\'};
-    case '/': return {'\\', '/'};
-    case '\b': return {'\\', 'b'};
-    case '\f': return {'\\', 'f'};
-    case '\n': return {'\\', 'n'};
-    case '\r': return {'\\', 'r'};
-    case '\t': return {'\\', 't'};
-    // case 'u': return UNICODE_SEQ;
-    default: return {'\0', escaped_char};
-  }
-}
-/**
- * @brief Parses the hex value from the four hex digits of a unicode code point escape sequence
- * \uXXXX.
- *
- * @param str Pointer to the first (most-significant) hex digit
- * @return The parsed hex value if successful, -1 otherwise.
- */
-__device__ __forceinline__ int32_t parse_unicode_hex(char const* str)
-{
-  // Prepare result
-  int32_t result = 0, base = 1;
-  constexpr int32_t hex_radix = 16;
-
-  // Iterate over hex digits right-to-left
-  size_type index = UNICODE_HEX_DIGIT_COUNT;
-  while (index-- > 0) {
-    char const ch = str[index];
-    if (ch >= '0' && ch <= '9') {
-      result += static_cast<int32_t>((ch - '0') + 0) * base;
-      base *= hex_radix;
-    } else if (ch >= 'A' && ch <= 'F') {
-      result += static_cast<int32_t>((ch - 'A') + 10) * base;
-      base *= hex_radix;
-    } else if (ch >= 'a' && ch <= 'f') {
-      result += static_cast<int32_t>((ch - 'a') + 10) * base;
-      base *= hex_radix;
-    } else {
-      return -1;
-    }
-  }
-  return result;
-}
-
-/**
- * @brief Writes the UTF-8 byte sequence to \p out_it and returns the number of bytes written to
- * \p out_it
- */
-constexpr size_type write_utf8_char(char_utf8 character, char*& out_it)
-{
-  auto const bytes = (out_it == nullptr) ? strings::detail::bytes_in_char_utf8(character)
-                                         : strings::detail::from_char_utf8(character, out_it);
-  if (out_it) out_it += bytes;
-  return bytes;
-}
-
-/**
- * @brief Processes a string, replaces escape sequences and optionally strips off the quote
- * characters.
- *
- * @tparam in_iterator_t A bidirectional input iterator type whose value_type is convertible to
- * char
- * @param in_begin Iterator to the first item to process
- * @param in_end Iterator to one past the last item to process
- * @param d_buffer Output character buffer to the first item to write
- * @param options Settings for controlling string processing behavior
- * @return A struct of (num_bytes_written, parsing_success_result), where num_bytes_written is
- * the number of bytes written to d_buffer, parsing_success_result is enum value indicating whether
- * parsing succeeded, item was parsed to null, or failed.
- */
-template <typename in_iterator_t>
-__device__ __forceinline__ data_casting_result_info
-process_string(in_iterator_t in_begin,
-               in_iterator_t in_end,
-               char* d_buffer,
-               cudf::io::parse_options_view const& options)
-{
-  int32_t bytes           = 0;
-  auto const num_in_chars = thrust::distance(in_begin, in_end);
-  // String values are indicated by keeping the quote character
-  bool const is_string_value =
-    num_in_chars >= 2LL &&
-    (options.quotechar == '\0' ||
-     (*in_begin == options.quotechar) && (*thrust::prev(in_end) == options.quotechar));
-
-  // Copy literal/numeric value
-  if (not is_string_value) {
-    while (in_begin != in_end) {
-      if (d_buffer) *d_buffer++ = *in_begin;
-      ++in_begin;
-      ++bytes;
-    }
-    return {bytes, data_casting_result::PARSING_SUCCESS};
-  }
-  // Whether in the original JSON this was a string value enclosed in quotes
-  // ({"a":"foo"} vs. {"a":1.23})
-  char const backslash_char = '\\';
-
-  // Escape-flag, set after encountering a backslash character
-  bool escape = false;
-
-  // Exclude beginning and ending quote chars from string range
-  if (!options.keepquotes) {
-    ++in_begin;
-    --in_end;
-  }
-
-  // Iterate over the input
-  while (in_begin != in_end) {
-    // Copy single character to output
-    if (!escape) {
-      escape = (*in_begin == backslash_char);
-      if (!escape) {
-        if (d_buffer) *d_buffer++ = *in_begin;
-        ++bytes;
-      }
-      ++in_begin;
-      continue;
-    }
-
-    // Previous char indicated beginning of escape sequence
-    // Reset escape flag for next loop iteration
-    escape = false;
-
-    // Check the character that is supposed to be escaped
-    auto escaped_char = get_escape_char(*in_begin);
-
-    // We escaped an invalid escape character -> "fail"/null for this item
-    if (escaped_char == NON_ESCAPE_CHAR) { return {bytes, data_casting_result::PARSING_FAILURE}; }
-
-    // Regular, single-character escape
-    if (escaped_char != UNICODE_SEQ) {
-      if (d_buffer) *d_buffer++ = escaped_char;
-      ++bytes;
-      ++in_begin;
-      continue;
-    }
-
-    // This is an escape sequence of a unicode code point: \uXXXX,
-    // where each X in XXXX represents a hex digit
-    // Skip over the 'u' char from \uXXXX to the first hex digit
-    ++in_begin;
-
-    // Make sure that there's at least 4 characters left from the
-    // input, which are expected to be hex digits
-    if (thrust::distance(in_begin, in_end) < UNICODE_HEX_DIGIT_COUNT) {
-      return {bytes, data_casting_result::PARSING_FAILURE};
-    }
-
-    auto hex_val = parse_unicode_hex(in_begin);
-
-    // Couldn't parse hex values from the four-character sequence -> "fail"/null for this item
-    if (hex_val < 0) { return {bytes, data_casting_result::PARSING_FAILURE}; }
-
-    // Skip over the four hex digits
-    thrust::advance(in_begin, UNICODE_HEX_DIGIT_COUNT);
-
-    // If this may be a UTF-16 encoded surrogate pair:
-    // we expect another \uXXXX sequence
-    int32_t hex_low_val = 0;
-    if (thrust::distance(in_begin, in_end) >= NUM_UNICODE_ESC_SEQ_CHARS &&
-        *in_begin == backslash_char && *thrust::next(in_begin) == 'u') {
-      // Try to parse hex value following the '\' and 'u' characters from what may be a UTF16 low
-      // surrogate
-      hex_low_val = parse_unicode_hex(thrust::next(in_begin, 2));
-    }
-
-    // This is indeed a UTF16 surrogate pair
-    if (hex_val >= UTF16_HIGH_SURROGATE_BEGIN && hex_val < UTF16_HIGH_SURROGATE_END &&
-        hex_low_val >= UTF16_LOW_SURROGATE_BEGIN && hex_low_val < UTF16_LOW_SURROGATE_END) {
-      // Skip over the second \uXXXX sequence
-      thrust::advance(in_begin, NUM_UNICODE_ESC_SEQ_CHARS);
-
-      // Compute UTF16-encoded code point
-      uint32_t unicode_code_point = 0x10000 + ((hex_val - UTF16_HIGH_SURROGATE_BEGIN) << 10) +
-                                    (hex_low_val - UTF16_LOW_SURROGATE_BEGIN);
-      auto utf8_chars = strings::detail::codepoint_to_utf8(unicode_code_point);
-      bytes += write_utf8_char(utf8_chars, d_buffer);
-    }
-
-    // Just a single \uXXXX sequence
-    else {
-      auto utf8_chars = strings::detail::codepoint_to_utf8(hex_val);
-      bytes += write_utf8_char(utf8_chars, d_buffer);
-    }
-  }
-
-  // The last character of the input is a backslash -> "fail"/null for this item
-  if (escape) { return {bytes, data_casting_result::PARSING_FAILURE}; }
-  return {bytes, data_casting_result::PARSING_SUCCESS};
-}
-
-template <typename str_tuple_it>
-struct string_parse {
-  str_tuple_it str_tuples;
-  bitmask_type* null_mask;
-  size_type* null_count_data;
-  cudf::io::parse_options_view const options;
-  size_type* d_offsets{};
-  char* d_chars{};
-
-  __device__ void operator()(size_type idx)
-  {
-    if (null_mask != nullptr && not bit_is_set(null_mask, idx)) {
-      if (!d_chars) d_offsets[idx] = 0;
-      return;
-    }
-    auto const in_begin     = str_tuples[idx].first;
-    auto const in_end       = in_begin + str_tuples[idx].second;
-    auto const num_in_chars = str_tuples[idx].second;
-
-    // Check if the value corresponds to the null literal
-    auto const is_null_literal =
-      (!d_chars) &&
-      serialized_trie_contains(options.trie_na, {in_begin, static_cast<std::size_t>(num_in_chars)});
-    if (is_null_literal && null_mask != nullptr) {
-      clear_bit(null_mask, idx);
-      atomicAdd(null_count_data, 1);
-      if (!d_chars) d_offsets[idx] = 0;
-      return;
-    }
-
-    char* d_buffer        = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    auto str_process_info = process_string(in_begin, in_end, d_buffer, options);
-    if (str_process_info.result != data_casting_result::PARSING_SUCCESS) {
-      if (null_mask != nullptr) {
-        clear_bit(null_mask, idx);
-        atomicAdd(null_count_data, 1);
-      }
-      if (!d_chars) d_offsets[idx] = 0;
-    } else {
-      if (!d_chars) d_offsets[idx] = str_process_info.bytes;
-    }
-  }
-};
-/**
- * @brief Parses the data from an iterator of string views, casting it to the given target data type
- *
- * @param str_tuples Iterator returning a string view, i.e., a (ptr, length) pair
- * @param col_size The total number of items of this column
- * @param col_type The column's target data type
- * @param null_mask A null mask that renders certain items from the input invalid
- * @param options Settings for controlling the processing behavior
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr The resource to be used for device memory allocation
- * @return The column that contains the parsed data
- */
-template <typename str_tuple_it, typename B>
-std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
-                                   size_type col_size,
-                                   data_type col_type,
-                                   B&& null_mask,
-                                   size_type null_count,
-                                   cudf::io::parse_options_view const& options,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-
-  auto d_null_count    = rmm::device_scalar<size_type>(null_count, stream);
-  auto null_count_data = d_null_count.data();
-
-  if (col_type == cudf::data_type{cudf::type_id::STRING}) {
-    // this utility calls the functor to build the offsets and chars columns;
-    // the bitmask and null count may be updated by parse failures
-    auto [offsets, chars] = cudf::strings::detail::make_strings_children(
-      string_parse<decltype(str_tuples)>{
-        str_tuples, static_cast<bitmask_type*>(null_mask.data()), null_count_data, options},
-      col_size,
-      stream,
-      mr);
-
-    return make_strings_column(col_size,
-                               std::move(offsets),
-                               std::move(chars),
-                               d_null_count.value(stream),
-                               std::move(null_mask));
-  }
-
-  auto out_col =
-    make_fixed_width_column(col_type, col_size, std::move(null_mask), null_count, stream, mr);
-  auto output_dv_ptr = mutable_column_device_view::create(*out_col, stream);
-
-  // use existing code (`ConvertFunctor`) to convert values
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    col_size,
-    [str_tuples, col = *output_dv_ptr, options, col_type, null_count_data] __device__(
-      size_type row) {
-      if (col.is_null(row)) { return; }
-      auto const in = str_tuples[row];
-
-      auto const is_null_literal =
-        serialized_trie_contains(options.trie_na, {in.first, static_cast<size_t>(in.second)});
-
-      if (is_null_literal) {
-        col.set_null(row);
-        atomicAdd(null_count_data, 1);
-        return;
-      }
-
-      // If this is a string value, remove quotes
-      auto [in_begin, in_end] = trim_quotes(in.first, in.first + in.second, options.quotechar);
-
-      auto const is_parsed = cudf::type_dispatcher(col_type,
-                                                   ConvertFunctor{},
-                                                   in_begin,
-                                                   in_end,
-                                                   col.data<char>(),
-                                                   row,
-                                                   col_type,
-                                                   options,
-                                                   false);
-      if (not is_parsed) {
-        col.set_null(row);
-        atomicAdd(null_count_data, 1);
-      }
-    });
-
-  out_col->set_null_count(d_null_count.value(stream));
-
-  return out_col;
-}
-
-}  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index cabf904f020..5d7fb9d6b43 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -16,14 +16,13 @@
 
 #include "nested_json.hpp"
 #include <io/utilities/parsing_utils.cuh>
-#include <io/utilities/type_inference.cuh>
+#include <io/utilities/string_parsing.hpp>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
-#include <cudf/io/detail/data_casting.cuh>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -331,23 +330,27 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
 {
   CUDF_FUNC_RANGE();
   auto const num_strings = node_range_begin.size();
-  rmm::device_uvector<thrust::pair<char const*, size_type>> string_views(num_strings, stream);
+  rmm::device_uvector<size_type> string_offsets(num_strings, stream);
+  rmm::device_uvector<size_type> string_lengths(num_strings, stream);
   auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
   thrust::transform(rmm::exec_policy(stream),
                     d_offset_pairs,
                     d_offset_pairs + num_strings,
-                    string_views.begin(),
-                    [data = input.data()] __device__(auto const& offsets) {
+                    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()),
+                    [] __device__(auto const& offsets) {
                       // Note: first character for non-field columns
-                      return thrust::make_pair(
-                        data + thrust::get<0>(offsets),
+                      return thrust::make_tuple(
+                        static_cast<size_type>(thrust::get<0>(offsets)),
                         static_cast<size_type>(thrust::get<1>(offsets) - thrust::get<0>(offsets)));
                     });
 
   cudf::io::parse_options_view options_view{};
   options_view.quotechar  = '\0';  // no quotes
   options_view.keepquotes = true;
-  auto d_column_names     = parse_data(string_views.begin(),
+  auto d_offset_length_it =
+    thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin());
+  auto d_column_names = parse_data(input.data(),
+                                   d_offset_length_it,
                                    num_strings,
                                    data_type{type_id::STRING},
                                    rmm::device_buffer{},
@@ -355,7 +358,7 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
                                    options_view,
                                    stream,
                                    rmm::mr::get_current_device_resource());
-  auto to_host            = [stream](auto const& col) {
+  auto to_host        = [stream](auto const& col) {
     if (col.is_empty()) return std::vector<std::string>{};
     auto const scv     = cudf::strings_column_view(col);
     auto const h_chars = cudf::detail::make_std_vector_sync<char>(
@@ -763,19 +766,6 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
       // TODO how about directly storing pair<char*, size_t> in json_column?
       auto offset_length_it =
         thrust::make_zip_iterator(json_col.string_offsets.begin(), json_col.string_lengths.begin());
-      // Prepare iterator that returns (string_offset, string_length)-pairs needed by inference
-      auto string_ranges_it =
-        thrust::make_transform_iterator(offset_length_it, [] __device__(auto ip) {
-          return thrust::pair<json_column::row_offset_t, std::size_t>{
-            thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
-        });
-
-      // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion
-      auto string_spans_it = thrust::make_transform_iterator(
-        offset_length_it, [data = d_input.data()] __device__(auto ip) {
-          return thrust::pair<char const*, std::size_t>{
-            data + thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
-        });
 
       data_type target_type{};
 
@@ -790,12 +780,13 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
       // Infer column type, if we don't have an explicit type for it
       else {
         target_type = cudf::io::detail::infer_data_type(
-          options.json_view(), d_input, string_ranges_it, col_size, stream);
+          options.json_view(), d_input, offset_length_it, col_size, stream);
       }
 
       auto [result_bitmask, null_count] = make_validity(json_col);
       // Convert strings to the inferred data type
-      auto col = parse_data(string_spans_it,
+      auto col = parse_data(d_input.data(),
+                            offset_length_it,
                             col_size,
                             target_type,
                             std::move(result_bitmask),
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 0b49f97597d..06ac11485cb 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -19,14 +19,13 @@
 #include <io/fst/logical_stack.cuh>
 #include <io/fst/lookup_tables.cuh>
 #include <io/utilities/parsing_utils.cuh>
-#include <io/utilities/type_inference.cuh>
+#include <io/utilities/string_parsing.hpp>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/detail/valid_if.cuh>
-#include <cudf/io/detail/data_casting.cuh>
 #include <cudf/io/detail/tokenize_json.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/table/table.hpp>
@@ -1949,20 +1948,6 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
       auto offset_length_it =
         thrust::make_zip_iterator(d_string_offsets.begin(), d_string_lengths.begin());
 
-      // Prepare iterator that returns (string_offset, string_length)-pairs needed by inference
-      auto string_ranges_it =
-        thrust::make_transform_iterator(offset_length_it, [] __device__(auto ip) {
-          return thrust::pair<json_column::row_offset_t, std::size_t>{
-            thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
-        });
-
-      // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion
-      auto string_spans_it = thrust::make_transform_iterator(
-        offset_length_it, [data = d_input.data()] __device__(auto ip) {
-          return thrust::pair<char const*, std::size_t>{
-            data + thrust::get<0>(ip), static_cast<std::size_t>(thrust::get<1>(ip))};
-        });
-
       data_type target_type{};
 
       if (schema.has_value()) {
@@ -1978,7 +1963,7 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
         target_type =
           cudf::io::detail::infer_data_type(parsing_options(options, stream).json_view(),
                                             d_input,
-                                            string_ranges_it,
+                                            offset_length_it,
                                             col_size,
                                             stream);
       }
@@ -1986,7 +1971,8 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
       auto [result_bitmask, null_count] = make_validity(json_col);
 
       // Convert strings to the inferred data type
-      auto col = parse_data(string_spans_it,
+      auto col = parse_data(d_input.data(),
+                            offset_length_it,
                             col_size,
                             target_type,
                             std::move(result_bitmask),
diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index 1e44522ed33..2d363c51fce 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -20,6 +20,7 @@
  */
 
 #include <io/csv/durations.hpp>
+#include <io/utilities/parsing_utils.cuh>
 #include <lists/utilities.hpp>
 
 #include <cudf/column/column_device_view.cuh>
@@ -27,9 +28,9 @@
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/data_sink.hpp>
-#include <cudf/io/detail/data_casting.cuh>
 #include <cudf/io/detail/json.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu
new file mode 100644
index 00000000000..1772e5e43fa
--- /dev/null
+++ b/cpp/src/io/utilities/data_casting.cu
@@ -0,0 +1,987 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <io/utilities/parsing_utils.cuh>
+#include <io/utilities/string_parsing.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/functional.h>
+#include <thrust/transform_reduce.h>
+
+#include <cub/cub.cuh>
+
+#include <memory>
+#include <type_traits>
+
+namespace cudf::io::json::detail {
+
+constexpr auto SINGLE_THREAD_THRESHOLD = 128;
+constexpr auto WARP_THRESHOLD          = 128 * 128;  // 16K
+
+// Unicode code point escape sequence
+static constexpr char UNICODE_SEQ = 0x7F;
+
+// Invalid escape sequence
+static constexpr char NON_ESCAPE_CHAR = 0x7E;
+
+// Unicode code point escape sequence prefix comprises '\' and 'u' characters
+static constexpr size_type UNICODE_ESC_PREFIX = 2;
+
+// Unicode code point escape sequence comprises four hex characters
+static constexpr size_type UNICODE_HEX_DIGIT_COUNT = 4;
+
+// A unicode code point escape sequence is \uXXXX
+static auto constexpr NUM_UNICODE_ESC_SEQ_CHARS = UNICODE_ESC_PREFIX + UNICODE_HEX_DIGIT_COUNT;
+
+static constexpr auto UTF16_HIGH_SURROGATE_BEGIN = 0xD800;
+static constexpr auto UTF16_HIGH_SURROGATE_END   = 0xDC00;
+static constexpr auto UTF16_LOW_SURROGATE_BEGIN  = 0xDC00;
+static constexpr auto UTF16_LOW_SURROGATE_END    = 0xE000;
+
+/**
+ * @brief Describing whether data casting of a certain item succeed, the item was parsed to null, or
+ * whether type casting failed.
+ */
+enum class data_casting_result { PARSING_SUCCESS, PARSED_TO_NULL, PARSING_FAILURE };
+
+/**
+ * @brief Providing additional information about the type casting result.
+ */
+struct data_casting_result_info {
+  // Number of bytes written to output
+  size_type bytes;
+  // Whether parsing succeeded, item was parsed to null, or failed
+  data_casting_result result;
+};
+
+/**
+ * @brief Returns the character to output for a given escaped character that's following a
+ * backslash.
+ *
+ * @param escaped_char The character following the backslash.
+ * @return The character to output for a given character that's following a backslash
+ */
+__device__ __forceinline__ char get_escape_char(char escaped_char)
+{
+  switch (escaped_char) {
+    case '"': return '"';
+    case '\\': return '\\';
+    case '/': return '/';
+    case 'b': return '\b';
+    case 'f': return '\f';
+    case 'n': return '\n';
+    case 'r': return '\r';
+    case 't': return '\t';
+    case 'u': return UNICODE_SEQ;
+    default: return NON_ESCAPE_CHAR;
+  }
+}
+
+/**
+ * @brief Parses the hex value from the four hex digits of a unicode code point escape sequence
+ * \uXXXX.
+ *
+ * @param str Pointer to the first (most-significant) hex digit
+ * @return The parsed hex value if successful, -1 otherwise.
+ */
+__device__ __forceinline__ int32_t parse_unicode_hex(char const* str)
+{
+  // Prepare result
+  int32_t result = 0, base = 1;
+  constexpr int32_t hex_radix = 16;
+
+  // Iterate over hex digits right-to-left
+  size_type index = UNICODE_HEX_DIGIT_COUNT;
+  while (index-- > 0) {
+    char const ch = str[index];
+    if (ch >= '0' && ch <= '9') {
+      result += static_cast<int32_t>((ch - '0') + 0) * base;
+      base *= hex_radix;
+    } else if (ch >= 'A' && ch <= 'F') {
+      result += static_cast<int32_t>((ch - 'A') + 10) * base;
+      base *= hex_radix;
+    } else if (ch >= 'a' && ch <= 'f') {
+      result += static_cast<int32_t>((ch - 'a') + 10) * base;
+      base *= hex_radix;
+    } else {
+      return -1;
+    }
+  }
+  return result;
+}
+
+/**
+ * @brief Writes the UTF-8 byte sequence to \p out_it and returns the number of bytes written to
+ * \p out_it
+ */
+constexpr size_type write_utf8_char(char_utf8 character, char*& out_it)
+{
+  auto const bytes = (out_it == nullptr) ? strings::detail::bytes_in_char_utf8(character)
+                                         : strings::detail::from_char_utf8(character, out_it);
+  if (out_it) out_it += bytes;
+  return bytes;
+}
+
+/**
+ * @brief Processes a string, replaces escape sequences and optionally strips off the quote
+ * characters.
+ *
+ * @tparam in_iterator_t A bidirectional input iterator type whose value_type is convertible to
+ * char
+ * @param in_begin Iterator to the first item to process
+ * @param in_end Iterator to one past the last item to process
+ * @param d_buffer Output character buffer to the first item to write
+ * @param options Settings for controlling string processing behavior
+ * @return A struct of (num_bytes_written, parsing_success_result), where num_bytes_written is
+ * the number of bytes written to d_buffer, parsing_success_result is enum value indicating whether
+ * parsing succeeded, item was parsed to null, or failed.
+ */
+template <typename in_iterator_t>
+__device__ __forceinline__ data_casting_result_info
+process_string(in_iterator_t in_begin,
+               in_iterator_t in_end,
+               char* d_buffer,
+               cudf::io::parse_options_view const& options)
+{
+  int32_t bytes           = 0;
+  auto const num_in_chars = thrust::distance(in_begin, in_end);
+  // String values are indicated by keeping the quote character
+  bool const is_string_value =
+    num_in_chars >= 2LL &&
+    (options.quotechar == '\0' ||
+     (*in_begin == options.quotechar) && (*thrust::prev(in_end) == options.quotechar));
+
+  // Copy literal/numeric value
+  if (not is_string_value) {
+    bytes += (in_end - in_begin);
+    if (d_buffer) d_buffer = thrust::copy(thrust::seq, in_begin, in_end, d_buffer);
+    return {bytes, data_casting_result::PARSING_SUCCESS};
+  }
+  char constexpr backslash_char = '\\';
+
+  // Escape-flag, set after encountering a backslash character
+  bool is_prev_char_escape = false;
+
+  // Exclude beginning and ending quote chars from string range
+  if (!options.keepquotes) {
+    ++in_begin;
+    --in_end;
+  }
+
+  // Iterate over the input
+  while (in_begin != in_end) {
+    // Copy single character to output
+    if (!is_prev_char_escape) {
+      is_prev_char_escape = (*in_begin == backslash_char);
+      if (!is_prev_char_escape) {
+        if (d_buffer) *d_buffer++ = *in_begin;
+        ++bytes;
+      }
+      ++in_begin;
+      continue;
+    }
+
+    // Previous char indicated beginning of escape sequence
+    // Reset escape flag for next loop iteration
+    is_prev_char_escape = false;
+
+    // Check the character that is supposed to be escaped
+    auto escaped_char = get_escape_char(*in_begin);
+
+    // We escaped an invalid escape character -> "fail"/null for this item
+    if (escaped_char == NON_ESCAPE_CHAR) { return {bytes, data_casting_result::PARSING_FAILURE}; }
+
+    // Regular, single-character escape
+    if (escaped_char != UNICODE_SEQ) {
+      if (d_buffer) *d_buffer++ = escaped_char;
+      ++bytes;
+      ++in_begin;
+      continue;
+    }
+
+    // This is an escape sequence of a unicode code point: \uXXXX,
+    // where each X in XXXX represents a hex digit
+    // Skip over the 'u' char from \uXXXX to the first hex digit
+    ++in_begin;
+
+    // Make sure that there's at least 4 characters left from the
+    // input, which are expected to be hex digits
+    if (thrust::distance(in_begin, in_end) < UNICODE_HEX_DIGIT_COUNT) {
+      return {bytes, data_casting_result::PARSING_FAILURE};
+    }
+
+    auto hex_val = parse_unicode_hex(in_begin);
+
+    // Couldn't parse hex values from the four-character sequence -> "fail"/null for this item
+    if (hex_val < 0) { return {bytes, data_casting_result::PARSING_FAILURE}; }
+
+    // Skip over the four hex digits
+    thrust::advance(in_begin, UNICODE_HEX_DIGIT_COUNT);
+
+    // If this may be a UTF-16 encoded surrogate pair:
+    // we expect another \uXXXX sequence
+    int32_t hex_low_val = 0;
+    if (hex_val >= UTF16_HIGH_SURROGATE_BEGIN && hex_val < UTF16_HIGH_SURROGATE_END &&
+        thrust::distance(in_begin, in_end) >= NUM_UNICODE_ESC_SEQ_CHARS &&
+        *in_begin == backslash_char && *thrust::next(in_begin) == 'u') {
+      // Try to parse hex value following the '\' and 'u' characters from what may be a UTF16 low
+      // surrogate
+      hex_low_val = parse_unicode_hex(thrust::next(in_begin, 2));
+    }
+
+    // This is indeed a UTF16 surrogate pair
+    if (hex_val >= UTF16_HIGH_SURROGATE_BEGIN && hex_val < UTF16_HIGH_SURROGATE_END &&
+        hex_low_val >= UTF16_LOW_SURROGATE_BEGIN && hex_low_val < UTF16_LOW_SURROGATE_END) {
+      // Skip over the second \uXXXX sequence
+      thrust::advance(in_begin, NUM_UNICODE_ESC_SEQ_CHARS);
+
+      // Compute UTF16-encoded code point
+      uint32_t unicode_code_point = 0x10000 + ((hex_val - UTF16_HIGH_SURROGATE_BEGIN) << 10) +
+                                    (hex_low_val - UTF16_LOW_SURROGATE_BEGIN);
+      auto utf8_chars = strings::detail::codepoint_to_utf8(unicode_code_point);
+      bytes += write_utf8_char(utf8_chars, d_buffer);
+    } else {
+      // Just a single \uXXXX sequence
+      auto utf8_chars = strings::detail::codepoint_to_utf8(hex_val);
+      bytes += write_utf8_char(utf8_chars, d_buffer);
+    }
+  }
+
+  // The last character of the input is a backslash -> "fail"/null for this item
+  if (is_prev_char_escape) { return {bytes, data_casting_result::PARSING_FAILURE}; }
+  return {bytes, data_casting_result::PARSING_SUCCESS};
+}
+
+/**
+ * @brief Data structure to hold 1 bit per thread with previous `UNICODE_LOOK_BACK` bits stored in a
+ * warp.
+ *
+ * @tparam num_warps number of warps in the block
+ */
+template <unsigned num_warps>
+struct bitfield_warp {
+  static constexpr auto UNICODE_LOOK_BACK{5};
+  // 5 because for skipping unicode hex chars, look back up to 5 chars are needed.
+  // 5+32 for each warp.
+  bool is_slash[num_warps][UNICODE_LOOK_BACK + cudf::detail::warp_size];
+
+  /// Sets all bits to 0
+  __device__ void reset(unsigned warp_id)
+  {
+    if (threadIdx.x % cudf::detail::warp_size < UNICODE_LOOK_BACK) {
+      is_slash[warp_id][threadIdx.x % cudf::detail::warp_size] = 0;
+    }
+    is_slash[warp_id][threadIdx.x % cudf::detail::warp_size + UNICODE_LOOK_BACK] = 0;
+  }
+
+  /// Shifts UNICODE_LOOK_BACK bits to the left to hold the previous UNICODE_LOOK_BACK bits
+  __device__ void shift(unsigned warp_id)
+  {
+    if (threadIdx.x % cudf::detail::warp_size < UNICODE_LOOK_BACK)
+      is_slash[warp_id][threadIdx.x % cudf::detail::warp_size] =
+        is_slash[warp_id][cudf::detail::warp_size + threadIdx.x % cudf::detail::warp_size];
+    __syncwarp();
+  }
+
+  /// Each thread in a warp sets its own bit.
+  __device__ void set_bits(unsigned warp_id, bool is_escaping_backslash)
+  {
+    is_slash[warp_id][UNICODE_LOOK_BACK + threadIdx.x % cudf::detail::warp_size] =
+      is_escaping_backslash;
+    __syncwarp();
+  }
+
+  /// Each thread in a warp gets the requested bit.
+  __device__ bool get_bit(unsigned warp_id, int bit_index)
+  {
+    return is_slash[warp_id][UNICODE_LOOK_BACK + bit_index];
+  }
+};
+
+/**
+ * @brief Data structure to hold 1 bit per thread with previous `UNICODE_LOOK_BACK` bits stored in a
+ * block.
+ *
+ * @tparam num_warps number of warps in the block
+ */
+template <unsigned num_warps>
+struct bitfield_block {
+  static constexpr auto UNICODE_LOOK_BACK{5};
+  // 5 because for skipping unicode hex chars, look back up to 5 chars are needed.
+  // 5 + num_warps*32 for entire block
+  bool is_slash[UNICODE_LOOK_BACK + num_warps * cudf::detail::warp_size];
+
+  /// Sets all bits to 0
+  __device__ void reset(unsigned warp_id)
+  {
+    if (threadIdx.x < UNICODE_LOOK_BACK) { is_slash[threadIdx.x] = 0; }
+    is_slash[threadIdx.x + UNICODE_LOOK_BACK] = 0;
+  }
+
+  /// Shifts UNICODE_LOOK_BACK bits to the left to hold the previous UNICODE_LOOK_BACK bits
+  __device__ void shift(unsigned warp_id)
+  {
+    if (threadIdx.x < UNICODE_LOOK_BACK)
+      is_slash[threadIdx.x] = is_slash[num_warps * cudf::detail::warp_size + threadIdx.x];
+    __syncthreads();
+  }
+
+  /// Each thread in a block sets its own bit.
+  __device__ void set_bits(unsigned warp_id, bool is_escaping_backslash)
+  {
+    is_slash[UNICODE_LOOK_BACK + threadIdx.x] = is_escaping_backslash;
+    __syncthreads();
+  }
+
+  /// Each thread in a block gets the requested bit.
+  __device__ bool get_bit(unsigned warp_id, int bit_index)
+  {
+    return is_slash[UNICODE_LOOK_BACK + bit_index];
+  }
+};
+
+// Algorithm: warp/block parallel version of string_parse and process_string()
+// Decoding character classes (u8, u16, \*, *):
+// character      count: input->output
+// \uXXXX         6->2/3/4
+// \uXXXX\uXXXX  12->2/3/4
+// \"             2->1
+// *              1->1
+//
+// ERROR conditions. (all collaborating threads quit)
+// c=='\' & curr_idx == end_idx-1;
+// [c-1]=='\' &  get_escape[c]==NEC
+// [c-1]=='\' &  [c]=='u' & end_idx-curr_idx < UNICODE_HEX_DIGIT_COUNT
+// [c-1]=='\' &  [c]=='u' & end_idx-curr_idx >= UNICODE_HEX_DIGIT_COUNT && non-hex
+//
+// skip conditions. (current thread skips this char, no output)
+// c=='\' skip. (Escaping char only)
+// [c-2]=='\' && [c-1]=='u' for [2,1], [3,2] [4,5], [5, 6], skip.
+//
+// write conditions. (write to d_buffer)
+// [c-1]!='\' &  [c]!='\' write [c]
+// [c-1]!='\' &  [c]=='\' skip (already covered in skip conditions)
+// [c-1]=='\' &  [c]!=NEC && [c]!=UNICODE_SEQ, write [c]
+// [c-1]=='\' &  [c]=='u' & end_idx-curr_idx >= UNICODE_HEX_DIGIT_COUNT && hex, DECODE
+// [c+1:4]=curr_hex_val
+//        // if [c+5]=='\' & [c+6]=='u' & end_idx-curr_idx >= UNICODE_HEX_DIGIT_COUNT &&
+//        hex,DECODE [c+7:4]=next_hex_val
+//        // if [c-7]=='\' & [c-6]=='u' & end_idx-curr_idx >= UNICODE_HEX_DIGIT_COUNT &&
+//        hex,DECODE [c-5:4]=prev_hex_val prev_hex_val, curr_hex_val, next_hex_val
+//        // if prev_hex_val in high, curr_hex_val in low, skip.
+//        // if curr_hex_val in high, next_hex_val in low, write [u16]
+// if curr_hex_val not in high, write [u8]
+// before writing, find num of output characters per threads,
+// then do intra-warp/intra-block scan for out_idx
+// propagate offset from next iteration to carry forward.
+// Uses 1 warp per string or 1 block per string
+
+/**
+ * @brief Warp/Block parallel version of string_parse functor
+ *
+ * @tparam is_warp True if 1 warp per string, False if 1 block per string
+ * @tparam num_warps Number of warps per block
+ * @tparam str_tuple_it Iterator type for tuple with string pointer and its length
+ * @param str_tuples iterator of tuple with string pointer and its length
+ * @param total_out_strings Number of string rows to be processed
+ * @param str_counter Counter to keep track of processed number of strings
+ * @param null_mask Null mask
+ * @param null_count_data pointer to store null count
+ * @param options Settings for controlling string processing behavior
+ * @param d_offsets Offsets to identify where to store the results for each string
+ * @param d_chars Character array to store the characters of strings
+ */
+template <bool is_warp, size_type num_warps, typename str_tuple_it>
+__global__ void parse_fn_string_parallel(str_tuple_it str_tuples,
+                                         size_type total_out_strings,
+                                         size_type* str_counter,
+                                         bitmask_type* null_mask,
+                                         size_type* null_count_data,
+                                         cudf::io::parse_options_view const options,
+                                         size_type* d_offsets,
+                                         char* d_chars)
+{
+  constexpr auto BLOCK_SIZE =
+    is_warp ? cudf::detail::warp_size : cudf::detail::warp_size * num_warps;
+  size_type lane = is_warp ? (threadIdx.x % BLOCK_SIZE) : threadIdx.x;
+
+  // get 1-string index per warp/block
+  auto get_next_string = [&]() {
+    if constexpr (is_warp) {
+      size_type istring;
+      if (lane == 0) { istring = atomicAdd(str_counter, 1); }
+      return __shfl_sync(0xffffffff, istring, 0);
+    } else {
+      // Ensure lane 0 doesn't update istring before all threads have read the previous iteration's
+      // istring value
+      __syncthreads();
+      __shared__ size_type istring;
+      if (lane == 0) { istring = atomicAdd(str_counter, 1); }
+      __syncthreads();
+      return istring;
+    }
+  };
+  // grid-stride loop.
+  for (size_type istring = get_next_string(); istring < total_out_strings;
+       istring           = get_next_string()) {
+    // skip nulls
+    if (null_mask != nullptr && not bit_is_set(null_mask, istring)) {
+      if (!d_chars && lane == 0) d_offsets[istring] = 0;
+      continue;  // gride-stride return;
+    }
+
+    auto in_begin           = str_tuples[istring].first;
+    auto in_end             = in_begin + str_tuples[istring].second;
+    auto const num_in_chars = str_tuples[istring].second;
+    if constexpr (is_warp) {
+      if (num_in_chars <= SINGLE_THREAD_THRESHOLD or num_in_chars > WARP_THRESHOLD) continue;
+    } else {
+      if (num_in_chars <= WARP_THRESHOLD) continue;
+    }
+
+    // Check if the value corresponds to the null literal
+    if (!d_chars) {
+      auto const is_null_literal = serialized_trie_contains(
+        options.trie_na, {in_begin, static_cast<std::size_t>(num_in_chars)});
+      if (is_null_literal && null_mask != nullptr) {
+        if (lane == 0) {
+          clear_bit(null_mask, istring);
+          atomicAdd(null_count_data, 1);
+          if (!d_chars) d_offsets[istring] = 0;
+        }
+        continue;  // gride-stride return;
+      }
+    }
+    // String values are indicated by keeping the quote character
+    bool const is_string_value =
+      num_in_chars >= 2LL &&
+      (options.quotechar == '\0' ||
+       (*in_begin == options.quotechar) && (*thrust::prev(in_end) == options.quotechar));
+    char* d_buffer = d_chars ? d_chars + d_offsets[istring] : nullptr;
+
+    // Copy literal/numeric value
+    if (not is_string_value) {
+      if (!d_chars) {
+        if (lane == 0) { d_offsets[istring] = in_end - in_begin; }
+      } else {
+        for (thread_index_type char_index = lane; char_index < (in_end - in_begin);
+             char_index += BLOCK_SIZE) {
+          d_buffer[char_index] = in_begin[char_index];
+        }
+      }
+      continue;  // gride-stride return;
+    }
+
+    // Exclude beginning and ending quote chars from string range
+    if (!options.keepquotes) {
+      ++in_begin;
+      --in_end;
+    }
+    // warp-parallelized or block-parallelized process_string()
+
+    auto is_hex = [](auto ch) {
+      return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
+    };
+
+    // for backslash scan calculation: is_previous_escaping_backslash
+    [[maybe_unused]] auto warp_id = threadIdx.x / cudf::detail::warp_size;
+    bool init_state_reg;
+    __shared__ bool init_state_shared;
+    size_type last_offset_reg;
+    __shared__ size_type last_offset_shared;
+    bool& init_state(is_warp ? init_state_reg : init_state_shared);
+    size_type& last_offset(is_warp ? last_offset_reg : last_offset_shared);
+    if (is_warp || lane == 0) {
+      init_state  = false;
+      last_offset = 0;
+    }
+    using bitfield =
+      std::conditional_t<is_warp, bitfield_warp<num_warps>, bitfield_block<num_warps>>;
+    __shared__ bitfield is_slash;
+    is_slash.reset(warp_id);
+    __syncthreads();
+    // 0-31, 32-63, ... i*32-n.
+    // entire warp executes but with mask.
+    for (thread_index_type char_index = lane;
+         char_index < cudf::util::round_up_safe(in_end - in_begin, static_cast<long>(BLOCK_SIZE));
+         char_index += BLOCK_SIZE) {
+      bool const is_within_bounds = char_index < (in_end - in_begin);
+      auto const MASK   = is_warp ? __ballot_sync(0xffffffff, is_within_bounds) : 0xffffffff;
+      auto const c      = is_within_bounds ? in_begin[char_index] : '\0';
+      auto const prev_c = (char_index > 0 and is_within_bounds) ? in_begin[char_index - 1] : '\0';
+      auto const escaped_char = get_escape_char(c);
+
+      bool is_escaping_backslash{false};
+      [[maybe_unused]] bool is_prev_escaping_backslash{false};
+      // To check current is backslash by checking if previous is backslash.
+      // curr = !prev & c=='\\'
+      // So, scan is required from beginning of string.
+      // State table approach (intra-warp FST) (intra-block FST)
+      // 2 states: Not-Slash(NS), Slash(S).
+      // prev  /   *
+      // NS    S  NS
+      //  S   NS  NS
+      // After inclusive scan, all current S states translate to escaping backslash.
+      // All escaping backslash should be skipped.
+
+      struct state_table {
+        // using bit fields instead of state[2]
+        bool state0 : 1;
+        bool state1 : 1;
+        bool inline __device__ get(bool init_state) const { return init_state ? state1 : state0; }
+      };
+      state_table curr{is_within_bounds && c == '\\', false};  // state transition vector.
+      auto composite_op = [](state_table op1, state_table op2) {
+        // equivalent of state_table{op2.state[op1.state[0]], op2.state[op1.state[1]]};
+        return state_table{op1.state0 ? op2.state1 : op2.state0,
+                           op1.state1 ? op2.state1 : op2.state0};
+      };
+      state_table scanned;
+      // inclusive scan of escaping backslashes
+      if constexpr (is_warp) {
+        using SlashScan = cub::WarpScan<state_table>;
+        __shared__ typename SlashScan::TempStorage temp_slash[num_warps];
+        SlashScan(temp_slash[warp_id]).InclusiveScan(curr, scanned, composite_op);
+        is_escaping_backslash = scanned.get(init_state);
+        init_state            = __shfl_sync(MASK, is_escaping_backslash, BLOCK_SIZE - 1);
+        __syncwarp();
+        is_slash.shift(warp_id);
+        is_slash.set_bits(warp_id, is_escaping_backslash);
+        is_prev_escaping_backslash = is_slash.get_bit(warp_id, lane - 1);
+      } else {
+        using SlashScan = cub::BlockScan<state_table, BLOCK_SIZE>;
+        __shared__ typename SlashScan::TempStorage temp_slash;
+        SlashScan(temp_slash).InclusiveScan(curr, scanned, composite_op);
+        is_escaping_backslash = scanned.get(init_state);
+        __syncthreads();
+        if (threadIdx.x == BLOCK_SIZE - 1) init_state = is_escaping_backslash;
+        __syncthreads();
+        is_slash.shift(warp_id);
+        is_slash.set_bits(warp_id, is_escaping_backslash);
+        is_prev_escaping_backslash = is_slash.get_bit(warp_id, lane - 1);
+        // There is another __syncthreads() at the end of for-loop.
+      }
+
+      // String with parsing errors are made as null
+      bool error = false;
+      if (is_within_bounds) {
+        // curr=='\' and end, or prev=='\' and curr=='u' and end-curr < UNICODE_HEX_DIGIT_COUNT
+        // or prev=='\' and curr=='u' and end-curr >= UNICODE_HEX_DIGIT_COUNT and any non-hex
+        error |= (is_escaping_backslash /*c == '\\'*/ && char_index == (in_end - in_begin) - 1);
+        error |= (is_prev_escaping_backslash && escaped_char == NON_ESCAPE_CHAR);
+        error |= (is_prev_escaping_backslash && c == 'u' &&
+                  ((in_begin + char_index + UNICODE_HEX_DIGIT_COUNT >= in_end) |
+                   !is_hex(in_begin[char_index + 1]) | !is_hex(in_begin[char_index + 2]) |
+                   !is_hex(in_begin[char_index + 3]) | !is_hex(in_begin[char_index + 4])));
+      }
+      // Make sure all threads have no errors before continuing
+      if constexpr (is_warp) {
+        error = __any_sync(MASK, error);
+      } else {
+        using ErrorReduce = cub::BlockReduce<bool, BLOCK_SIZE>;
+        __shared__ typename ErrorReduce::TempStorage temp_storage_error;
+        __shared__ bool error_reduced;
+        error_reduced = ErrorReduce(temp_storage_error).Sum(error);  // TODO use cub::LogicalOR.
+        // only valid in thread0, so shared memory is used for broadcast.
+        __syncthreads();
+        error = error_reduced;
+      }
+      // If any thread has an error, skip the rest of the string and make this string as null
+      if (error) {
+        if (!d_chars && lane == 0) {
+          if (null_mask != nullptr) {
+            clear_bit(null_mask, istring);
+            atomicAdd(null_count_data, 1);
+          }
+          last_offset        = 0;
+          d_offsets[istring] = 0;
+        }
+        if constexpr (!is_warp) { __syncthreads(); }
+        break;  // gride-stride return;
+      }
+
+      // Skipping non-copied escaped characters
+      bool skip = !is_within_bounds;  // false;
+      // skip \ for \" \\ \/ \b \f \n \r \t \uXXXX
+      skip |= is_escaping_backslash;
+      if (is_within_bounds) {
+        // skip X for each X in \uXXXX
+        skip |=
+          char_index >= 2 && is_slash.get_bit(warp_id, lane - 2) && in_begin[char_index - 1] == 'u';
+        skip |=
+          char_index >= 3 && is_slash.get_bit(warp_id, lane - 3) && in_begin[char_index - 2] == 'u';
+        skip |=
+          char_index >= 4 && is_slash.get_bit(warp_id, lane - 4) && in_begin[char_index - 3] == 'u';
+        skip |=
+          char_index >= 5 && is_slash.get_bit(warp_id, lane - 5) && in_begin[char_index - 4] == 'u';
+      }
+      int this_num_out = 0;
+      cudf::char_utf8 write_char{};
+
+      if (!skip) {
+        // 1. Unescaped character
+        if (!is_prev_escaping_backslash) {
+          this_num_out = 1;
+          // writes char directly for non-unicode
+        } else {
+          // 2. Escaped character
+          if (escaped_char != UNICODE_SEQ) {
+            this_num_out = 1;
+            // writes char directly for non-unicode
+          } else {
+            // 3. Unicode
+            // UTF8 \uXXXX
+            auto hex_val     = parse_unicode_hex(in_begin + char_index + 1);
+            auto hex_low_val = 0;
+            // UTF16 \uXXXX\uXXXX
+            // Note: no need for scanned_backslash below because we already know that
+            // only '\u' check is enough.
+            if (hex_val >= UTF16_HIGH_SURROGATE_BEGIN && hex_val < UTF16_HIGH_SURROGATE_END &&
+                (in_begin + char_index + UNICODE_HEX_DIGIT_COUNT + NUM_UNICODE_ESC_SEQ_CHARS) <
+                  in_end &&
+                in_begin[char_index + NUM_UNICODE_ESC_SEQ_CHARS - 1] == '\\' &&
+                in_begin[char_index + NUM_UNICODE_ESC_SEQ_CHARS] == 'u') {
+              hex_low_val = parse_unicode_hex(in_begin + char_index + 1 + 6);
+            }
+            if (hex_val >= UTF16_HIGH_SURROGATE_BEGIN && hex_val < UTF16_HIGH_SURROGATE_END &&
+                hex_low_val >= UTF16_LOW_SURROGATE_BEGIN && hex_low_val < UTF16_LOW_SURROGATE_END) {
+              // Compute UTF16-encoded code point
+              uint32_t unicode_code_point = 0x10000 +
+                                            ((hex_val - UTF16_HIGH_SURROGATE_BEGIN) << 10) +
+                                            (hex_low_val - UTF16_LOW_SURROGATE_BEGIN);
+              write_char   = strings::detail::codepoint_to_utf8(unicode_code_point);
+              this_num_out = strings::detail::bytes_in_char_utf8(write_char);
+            } else {
+              // if hex_val is high surrogate, ideally it should be parsing failure.
+              // but skipping it as other parsers do this too.
+              if (hex_val >= UTF16_LOW_SURROGATE_BEGIN && hex_val < UTF16_LOW_SURROGATE_END) {
+                // Ideally this should be skipped if previous char is high surrogate.
+                skip         = true;
+                this_num_out = 0;
+                write_char   = 0;
+              } else {
+                // if UTF8
+                write_char   = strings::detail::codepoint_to_utf8(hex_val);
+                this_num_out = strings::detail::bytes_in_char_utf8(write_char);
+              }
+            }
+          }
+        }
+      }  // !skip end.
+      {
+        // compute offset to write output for each thread
+        size_type offset;
+        if constexpr (is_warp) {
+          using OffsetScan = cub::WarpScan<size_type>;
+          __shared__ typename OffsetScan::TempStorage temp_storage[num_warps];
+          OffsetScan(temp_storage[warp_id]).ExclusiveSum(this_num_out, offset);
+        } else {
+          using OffsetScan = cub::BlockScan<size_type, BLOCK_SIZE>;
+          __shared__ typename OffsetScan::TempStorage temp_storage;
+          OffsetScan(temp_storage).ExclusiveSum(this_num_out, offset);
+          __syncthreads();
+        }
+        offset += last_offset;
+        // Write output
+        if (d_chars && !skip) {
+          auto const is_not_unicode = (!is_prev_escaping_backslash) || escaped_char != UNICODE_SEQ;
+          if (is_not_unicode) {
+            *(d_buffer + offset) = (!is_prev_escaping_backslash) ? c : escaped_char;
+          } else {
+            strings::detail::from_char_utf8(write_char, d_buffer + offset);
+          }
+        }
+        offset += this_num_out;
+        if constexpr (is_warp) {
+          last_offset = __shfl_sync(0xffffffff, offset, BLOCK_SIZE - 1);
+        } else {
+          __syncthreads();
+          if (threadIdx.x == BLOCK_SIZE - 1) last_offset = offset;
+          __syncthreads();
+        }
+      }
+    }  // char for-loop
+    if (!d_chars && lane == 0) { d_offsets[istring] = last_offset; }
+  }  // grid-stride for-loop
+}
+
+template <typename str_tuple_it>
+struct string_parse {
+  str_tuple_it str_tuples;
+  bitmask_type* null_mask;
+  size_type* null_count_data;
+  cudf::io::parse_options_view const options;
+  size_type* d_offsets{};
+  char* d_chars{};
+
+  __device__ void operator()(size_type idx)
+  {
+    if (null_mask != nullptr && not bit_is_set(null_mask, idx)) {
+      if (!d_chars) d_offsets[idx] = 0;
+      return;
+    }
+    auto const in_begin     = str_tuples[idx].first;
+    auto const in_end       = in_begin + str_tuples[idx].second;
+    auto const num_in_chars = str_tuples[idx].second;
+
+    if (num_in_chars > SINGLE_THREAD_THRESHOLD) return;
+
+    // Check if the value corresponds to the null literal
+    if (!d_chars) {
+      auto const is_null_literal = serialized_trie_contains(
+        options.trie_na, {in_begin, static_cast<std::size_t>(num_in_chars)});
+      if (is_null_literal && null_mask != nullptr) {
+        clear_bit(null_mask, idx);
+        atomicAdd(null_count_data, 1);
+        if (!d_chars) d_offsets[idx] = 0;
+        return;
+      }
+    }
+
+    char* d_buffer        = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    auto str_process_info = process_string(in_begin, in_end, d_buffer, options);
+    if (str_process_info.result != data_casting_result::PARSING_SUCCESS) {
+      if (null_mask != nullptr) {
+        clear_bit(null_mask, idx);
+        atomicAdd(null_count_data, 1);
+      }
+      if (!d_chars) d_offsets[idx] = 0;
+    } else {
+      if (!d_chars) d_offsets[idx] = str_process_info.bytes;
+    }
+  }
+};
+
+template <typename SymbolT>
+struct to_string_view_pair {
+  SymbolT const* data;
+  to_string_view_pair(SymbolT const* _data) : data(_data) {}
+  __device__ auto operator()(thrust::tuple<size_type, size_type> ip)
+  {
+    return thrust::pair<char const*, std::size_t>{data + thrust::get<0>(ip),
+                                                  static_cast<std::size_t>(thrust::get<1>(ip))};
+  }
+};
+
+template <typename string_view_pair_it>
+static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
+                                            size_type col_size,
+                                            rmm::device_buffer&& null_mask,
+                                            rmm::device_scalar<size_type>& d_null_count,
+                                            cudf::io::parse_options_view const& options,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
+{
+  //  CUDF_FUNC_RANGE();
+
+  auto const max_length = thrust::transform_reduce(
+    rmm::exec_policy(stream),
+    str_tuples,
+    str_tuples + col_size,
+    [] __device__(auto t) { return t.second; },
+    size_type{0},
+    thrust::maximum<size_type>{});
+
+  auto offsets = cudf::make_numeric_column(
+    data_type{type_to_id<size_type>()}, col_size + 1, cudf::mask_state::UNALLOCATED, stream, mr);
+  auto d_offsets       = offsets->mutable_view().data<size_type>();
+  auto null_count_data = d_null_count.data();
+
+  auto single_thread_fn = string_parse<decltype(str_tuples)>{
+    str_tuples, static_cast<bitmask_type*>(null_mask.data()), null_count_data, options, d_offsets};
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator<size_type>(0),
+                     col_size,
+                     single_thread_fn);
+
+  constexpr auto warps_per_block  = 8;
+  constexpr int threads_per_block = cudf::detail::warp_size * warps_per_block;
+  auto num_blocks                 = cudf::util::div_rounding_up_safe(col_size, warps_per_block);
+  auto str_counter                = cudf::numeric_scalar(size_type{0}, true, stream);
+
+  // TODO run these independent kernels in parallel streams.
+  if (max_length > SINGLE_THREAD_THRESHOLD) {
+    parse_fn_string_parallel<true, warps_per_block>
+      <<<num_blocks, threads_per_block, 0, stream.value()>>>(
+        str_tuples,
+        col_size,
+        str_counter.data(),
+        static_cast<bitmask_type*>(null_mask.data()),
+        null_count_data,
+        options,
+        d_offsets,
+        nullptr);
+  }
+
+  if (max_length > WARP_THRESHOLD) {
+    // for strings longer than WARP_THRESHOLD, 1 block per string
+    str_counter.set_value(0, stream);
+    parse_fn_string_parallel<false, warps_per_block>
+      <<<num_blocks, threads_per_block, 0, stream.value()>>>(
+        str_tuples,
+        col_size,
+        str_counter.data(),
+        static_cast<bitmask_type*>(null_mask.data()),
+        null_count_data,
+        options,
+        d_offsets,
+        nullptr);
+  }
+  auto const bytes =
+    cudf::detail::sizes_to_offsets(d_offsets, d_offsets + col_size + 1, d_offsets, stream);
+  CUDF_EXPECTS(bytes <= std::numeric_limits<size_type>::max(),
+               "Size of output exceeds the column size limit",
+               std::overflow_error);
+
+  // CHARS column
+  std::unique_ptr<column> chars =
+    strings::detail::create_chars_child_column(static_cast<size_type>(bytes), stream, mr);
+  auto d_chars = chars->mutable_view().data<char>();
+
+  single_thread_fn.d_chars = d_chars;
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator<size_type>(0),
+                     col_size,
+                     single_thread_fn);
+
+  if (max_length > SINGLE_THREAD_THRESHOLD) {
+    str_counter.set_value(0, stream);
+    parse_fn_string_parallel<true, warps_per_block>
+      <<<num_blocks, threads_per_block, 0, stream.value()>>>(
+        str_tuples,
+        col_size,
+        str_counter.data(),
+        static_cast<bitmask_type*>(null_mask.data()),
+        null_count_data,
+        options,
+        d_offsets,
+        d_chars);
+  }
+
+  if (max_length > WARP_THRESHOLD) {
+    str_counter.set_value(0, stream);
+    // for strings longer than WARP_THRESHOLD, 1 block per string
+    parse_fn_string_parallel<false, warps_per_block>
+      <<<num_blocks, threads_per_block, 0, stream.value()>>>(
+        str_tuples,
+        col_size,
+        str_counter.data(),
+        static_cast<bitmask_type*>(null_mask.data()),
+        null_count_data,
+        options,
+        d_offsets,
+        d_chars);
+  }
+
+  return make_strings_column(col_size,
+                             std::move(offsets),
+                             std::move(chars),
+                             d_null_count.value(stream),
+                             std::move(null_mask));
+}
+
+std::unique_ptr<column> parse_data(
+  const char* data,
+  thrust::zip_iterator<thrust::tuple<const size_type*, const size_type*>> offset_length_begin,
+  size_type col_size,
+  data_type col_type,
+  rmm::device_buffer&& null_mask,
+  size_type null_count,
+  cudf::io::parse_options_view const& options,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+
+  if (col_size == 0) { return make_empty_column(col_type); }
+  auto d_null_count    = rmm::device_scalar<size_type>(null_count, stream);
+  auto null_count_data = d_null_count.data();
+
+  // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion
+  auto str_tuples = thrust::make_transform_iterator(offset_length_begin, to_string_view_pair{data});
+
+  if (col_type == cudf::data_type{cudf::type_id::STRING}) {
+    return parse_string(str_tuples,
+                        col_size,
+                        std::forward<rmm::device_buffer>(null_mask),
+                        d_null_count,
+                        options,
+                        stream,
+                        mr);
+  }
+
+  auto out_col =
+    make_fixed_width_column(col_type, col_size, std::move(null_mask), null_count, stream, mr);
+  auto output_dv_ptr = mutable_column_device_view::create(*out_col, stream);
+
+  // use `ConvertFunctor` to convert non-string values
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    col_size,
+    [str_tuples, col = *output_dv_ptr, options, col_type, null_count_data] __device__(
+      size_type row) {
+      if (col.is_null(row)) { return; }
+      auto const in = str_tuples[row];
+
+      auto const is_null_literal =
+        serialized_trie_contains(options.trie_na, {in.first, static_cast<size_t>(in.second)});
+
+      if (is_null_literal) {
+        col.set_null(row);
+        atomicAdd(null_count_data, 1);
+        return;
+      }
+
+      // If this is a string value, remove quotes
+      auto [in_begin, in_end] = trim_quotes(in.first, in.first + in.second, options.quotechar);
+
+      auto const is_parsed = cudf::type_dispatcher(col_type,
+                                                   ConvertFunctor{},
+                                                   in_begin,
+                                                   in_end,
+                                                   col.data<char>(),
+                                                   row,
+                                                   col_type,
+                                                   options,
+                                                   false);
+      if (not is_parsed) {
+        col.set_null(row);
+        atomicAdd(null_count_data, 1);
+      }
+    });
+
+  out_col->set_null_count(d_null_count.value(stream));
+
+  return out_col;
+}
+
+}  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index 5c3af588411..43d62fcd513 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -116,6 +116,28 @@ struct parse_options {
   }
 };
 
+/**
+ * @brief Returns the escaped characters for a given character.
+ *
+ * @param escaped_char The character to escape.
+ * @return The escaped characters for a given character.
+ */
+__device__ __forceinline__ thrust::pair<char, char> get_escaped_char(char escaped_char)
+{
+  switch (escaped_char) {
+    case '"': return {'\\', '"'};
+    case '\\': return {'\\', '\\'};
+    case '/': return {'\\', '/'};
+    case '\b': return {'\\', 'b'};
+    case '\f': return {'\\', 'f'};
+    case '\n': return {'\\', 'n'};
+    case '\r': return {'\\', 'r'};
+    case '\t': return {'\\', 't'};
+    // case 'u': return UNICODE_SEQ;
+    default: return {'\0', escaped_char};
+  }
+}
+
 /**
  * @brief Returns the numeric value of an ASCII/UTF-8 character.
  * Handles hexadecimal digits, both uppercase and lowercase
diff --git a/cpp/src/io/utilities/string_parsing.hpp b/cpp/src/io/utilities/string_parsing.hpp
new file mode 100644
index 00000000000..12fc0a5b2e7
--- /dev/null
+++ b/cpp/src/io/utilities/string_parsing.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <io/utilities/parsing_utils.cuh>
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
+
+namespace cudf::io {
+namespace detail {
+
+/**
+ * @brief Infers data type for a given JSON string input `data`.
+ *
+ * @throw cudf::logic_error if input size is 0
+ * @throw cudf::logic_error if date time is not inferred as string
+ * @throw cudf::logic_error if data type inference failed
+ *
+ * @param options View of inference options
+ * @param data JSON string input
+ * @param offset_length_begin The beginning of an offset-length tuple sequence
+ * @param size Size of the string input
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return The inferred data type
+ */
+cudf::data_type infer_data_type(
+  cudf::io::json_inference_options_view const& options,
+  device_span<char const> data,
+  thrust::zip_iterator<thrust::tuple<const size_type*, const size_type*>> offset_length_begin,
+  std::size_t const size,
+  rmm::cuda_stream_view stream);
+}  // namespace detail
+
+namespace json::detail {
+
+/**
+ * @brief Parses the data from an iterator of string views, casting it to the given target data type
+ *
+ * @param data string input base pointer
+ * @param offset_length_begin The beginning of an offset-length tuple sequence
+ * @param col_size The total number of items of this column
+ * @param col_type The column's target data type
+ * @param null_mask A null mask that renders certain items from the input invalid
+ * @param options Settings for controlling the processing behavior
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr The resource to be used for device memory allocation
+ * @return The column that contains the parsed data
+ */
+std::unique_ptr<column> parse_data(
+  const char* data,
+  thrust::zip_iterator<thrust::tuple<const size_type*, const size_type*>> offset_length_begin,
+  size_type col_size,
+  data_type col_type,
+  rmm::device_buffer&& null_mask,
+  size_type null_count,
+  cudf::io::parse_options_view const& options,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr);
+}  // namespace json::detail
+}  // namespace cudf::io
diff --git a/cpp/src/io/utilities/type_inference.cuh b/cpp/src/io/utilities/type_inference.cu
similarity index 84%
rename from cpp/src/io/utilities/type_inference.cuh
rename to cpp/src/io/utilities/type_inference.cu
index a9ccc80ca33..79a5c8f1c4c 100644
--- a/cpp/src/io/utilities/type_inference.cuh
+++ b/cpp/src/io/utilities/type_inference.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,23 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#pragma once
 
 #include <io/utilities/column_type_histogram.hpp>
-#include <io/utilities/parsing_utils.cuh>
+#include <io/utilities/string_parsing.hpp>
 #include <io/utilities/trie.cuh>
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/span.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 
-#include <thrust/distance.h>
-#include <thrust/tuple.h>
-
 #include <cub/block/block_reduce.cuh>
 
 #include <cstddef>
@@ -114,14 +107,14 @@ __device__ __inline__ bool is_like_float(std::size_t len,
  *
  * @param[in] options View of inference options
  * @param[in] data JSON string input
- * @param[in] column_strings_begin The beginning of an offset-length tuple sequence
+ * @param[in] offset_length_begin The beginning of an offset-length tuple sequence
  * @param[in] size Size of the string input
  * @param[out] column_info Histogram of column type counters
  */
 template <int BlockSize, typename OptionsView, typename ColumnStringIter>
 __global__ void infer_column_type_kernel(OptionsView options,
                                          device_span<char const> data,
-                                         ColumnStringIter column_strings_begin,
+                                         ColumnStringIter offset_length_begin,
                                          std::size_t size,
                                          cudf::io::column_type_histogram* column_info)
 {
@@ -129,8 +122,8 @@ __global__ void infer_column_type_kernel(OptionsView options,
 
   for (auto idx = threadIdx.x + blockDim.x * blockIdx.x; idx < size;
        idx += gridDim.x * blockDim.x) {
-    auto const field_offset = thrust::get<0>(*(column_strings_begin + idx));
-    auto const field_len    = thrust::get<1>(*(column_strings_begin + idx));
+    auto const field_offset = thrust::get<0>(*(offset_length_begin + idx));
+    auto const field_len    = thrust::get<1>(*(offset_length_begin + idx));
     auto const field_begin  = data.begin() + field_offset;
 
     if (cudf::detail::serialized_trie_contains(
@@ -234,7 +227,7 @@ __global__ void infer_column_type_kernel(OptionsView options,
  *
  * @param options View of inference options
  * @param data JSON string input
- * @param column_strings_begin The beginning of an offset-length tuple sequence
+ * @param offset_length_begin The beginning of an offset-length tuple sequence
  * @param size Size of the string input
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @return A histogram containing column-specific type counters
@@ -242,7 +235,7 @@ __global__ void infer_column_type_kernel(OptionsView options,
 template <typename OptionsView, typename ColumnStringIter>
 cudf::io::column_type_histogram infer_column_type(OptionsView const& options,
                                                   cudf::device_span<char const> data,
-                                                  ColumnStringIter column_strings_begin,
+                                                  ColumnStringIter offset_length_begin,
                                                   std::size_t const size,
                                                   rmm::cuda_stream_view stream)
 {
@@ -254,40 +247,22 @@ cudf::io::column_type_histogram infer_column_type(OptionsView const& options,
     d_column_info.data(), 0, sizeof(cudf::io::column_type_histogram), stream.value()));
 
   infer_column_type_kernel<block_size><<<grid_size, block_size, 0, stream.value()>>>(
-    options, data, column_strings_begin, size, d_column_info.data());
+    options, data, offset_length_begin, size, d_column_info.data());
 
   return d_column_info.value(stream);
 }
 
-/**
- * @brief Infers data type for a given JSON string input `data`.
- *
- * @throw cudf::logic_error if input size is 0
- * @throw cudf::logic_error if date time is not inferred as string
- * @throw cudf::logic_error if data type inference failed
- *
- * @tparam OptionsView Type of inference options view
- * @tparam ColumnStringIter Iterator type whose `value_type` is convertible to
- * `thrust::tuple<device_span, string_view>`
- *
- * @param options View of inference options
- * @param data JSON string input
- * @param column_strings_begin The beginning of an offset-length tuple sequence
- * @param size Size of the string input
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return The inferred data type
- */
-template <typename OptionsView, typename ColumnStringIter>
-cudf::data_type infer_data_type(OptionsView const& options,
-                                device_span<char const> data,
-                                ColumnStringIter column_strings_begin,
-                                std::size_t const size,
-                                rmm::cuda_stream_view stream)
+cudf::data_type infer_data_type(
+  cudf::io::json_inference_options_view const& options,
+  device_span<char const> data,
+  thrust::zip_iterator<thrust::tuple<const size_type*, const size_type*>> offset_length_begin,
+  std::size_t const size,
+  rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(size != 0, "No data available for data type inference.\n");
 
-  auto const h_column_info = infer_column_type(options, data, column_strings_begin, size, stream);
+  auto const h_column_info = infer_column_type(options, data, offset_length_begin, size, stream);
 
   auto get_type_id = [&](auto const& cinfo) {
     auto int_count_total =
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 220f1a3391f..7c911ac2e04 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -26,6 +26,7 @@
 #include <cudf/io/arrow_io_source.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/strings/convert/convert_fixed_point.hpp>
+#include <cudf/strings/repeat_strings.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -1370,6 +1371,124 @@ TEST_F(JsonReaderTest, JsonExperimentalLines)
   CUDF_TEST_EXPECT_TABLES_EQUAL(legacy_reader_table.tbl->view(), table.tbl->view());
 }
 
+TEST_F(JsonReaderTest, JsonLongString)
+{
+  // Unicode
+  // 0000-FFFF     Basic Multilingual Plane
+  // 10000-10FFFF  Supplementary Plane
+  cudf::test::strings_column_wrapper col1{
+    {
+      "\"\\/\b\f\n\r\t",
+      "\"",
+      "\\",
+      "/",
+      "\b",
+      "\f\n",
+      "\r\t",
+      "$€",
+      "ராபிட்ஸ்",
+      "C𝞵𝓓𝒻",
+      "",  // null
+      "",  // null
+      "கார்த்தி",
+      "CႮ≪ㇳ䍏凹沦王辿龸ꁗ믜스폶ﴠ",  //  0000-FFFF
+      "𐀀𑿪𒐦𓃰𔙆 𖦆𗿿𘳕𚿾[↳] 𜽆𝓚𞤁🄰",                            // 10000-1FFFF
+      "𠘨𡥌𢗉𣇊𤊩𥅽𦉱𧴱𨁲𩁹𪐢𫇭𬬭𭺷𮊦屮",                // 20000-2FFFF
+      "𰾑𱔈𲍉",                                          // 30000-3FFFF
+      R"("$€ \u0024\u20ac \\u0024\\u20ac  \\\u0024\\\u20ac \\\\u0024\\\\u20ac)",
+      R"(        \\\\\\\\\\\\\\\\)",
+      R"(\\\\\\\\\\\\\\\\)",
+      R"(\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\)",
+      R"( \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\)",
+      R"(                      \\abcd)",
+      R"(                 \\\\\\\\\\\\\\\\                 \\\\\\\\\\\\\\\\)",
+      R"(                \\\\\\\\\\\\\\\\                 \\\\\\\\\\\\\\\\)",
+    },
+    cudf::test::iterators::nulls_at({10, 11})};
+
+  cudf::test::fixed_width_column_wrapper<int16_t> repeat_times{
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 19, 37, 81, 161, 323, 631, 1279, 10, 1, 2, 1, 100, 1000, 1, 3},
+    cudf::test::iterators::no_nulls()};
+  auto d_col2 = cudf::strings::repeat_strings(cudf::strings_column_view{col1}, repeat_times);
+  auto col2   = d_col2->view();
+  cudf::table_view const tbl_view{{col1, col2, repeat_times}};
+  cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"int16"}}};
+
+  std::vector<char> out_buffer;
+  auto destination     = cudf::io::sink_info(&out_buffer);
+  auto options_builder = cudf::io::json_writer_options_builder(destination, tbl_view)
+                           .include_nulls(true)
+                           .metadata(mt)
+                           .lines(true)
+                           .na_rep("null");
+
+  cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
+
+  cudf::table_view const expected = tbl_view;
+  std::map<std::string, data_type> types;
+  types["col1"]  = data_type{type_id::STRING};
+  types["col2"]  = data_type{type_id::STRING};
+  types["int16"] = data_type{type_id::INT16};
+
+  // Initialize parsing options (reading json lines)
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
+      .lines(true)
+      .dtypes(types);
+
+  // Read test data via nested JSON reader
+  auto const table = cudf::io::read_json(json_lines_options);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, table.tbl->view());
+}
+
+TEST_F(JsonReaderTest, ErrorStrings)
+{
+  // cases of invalid escape characters, invalid unicode encodings.
+  // Error strings will decode to nulls
+  auto const buffer = std::string{R"(
+    {"col0": "\"\a"}
+    {"col0": "\u"}
+    {"col0": "\u0"}
+    {"col0": "\u0b"}
+    {"col0": "\u00b"}
+    {"col0": "\u00bz"}
+    {"col0": "\t34567890123456\t9012345678901\ug0bc"}
+    {"col0": "\t34567890123456\t90123456789012\u0hbc"}
+    {"col0": "\t34567890123456\t90123456789012\u00ic"}
+    {"col0": "\u0b95\u0bbe\u0bb0\u0bcd\u0ba4\u0bcd\u0ba4\u0bbfகார்த்தி"}
+)"};
+  // Last one is not an error case, but shows that unicode in json is copied string column output.
+
+  cudf::io::json_reader_options const in_opts =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
+      .dtypes({data_type{cudf::type_id::STRING}})
+      .lines(true)
+      .legacy(false);
+
+  auto const result      = cudf::io::read_json(in_opts);
+  auto const result_view = result.tbl->view().column(0);
+
+  EXPECT_EQ(result.metadata.schema_info[0].name, "col0");
+  EXPECT_EQ(result_view.null_count(), 9);
+  cudf::test::strings_column_wrapper expected{
+    {"",
+     "",
+     "",
+     "",
+     "",
+     "",
+     "",
+     "",
+     "",
+     "கார்த்தி\xe0\xae\x95\xe0\xae\xbe\xe0\xae\xb0\xe0\xaf\x8d\xe0\xae\xa4\xe0\xaf\x8d\xe0\xae\xa4"
+     "\xe0\xae\xbf"},
+    // unicode hex 0xe0 0xae 0x95 0xe0 0xae 0xbe 0xe0 0xae 0xb0 0xe0 0xaf 0x8d
+    //             0xe0 0xae 0xa4 0xe0 0xaf 0x8d 0xe0 0xae 0xa4 0xe0 0xae 0xbf
+    cudf::test::iterators::nulls_at({0, 1, 2, 3, 4, 5, 6, 7, 8})};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_view, expected);
+}
+
 TEST_F(JsonReaderTest, TokenAllocation)
 {
   std::array<std::string const, 3> const json_inputs{
diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu
index 5c32131114d..9eb5e8f5230 100644
--- a/cpp/tests/io/json_type_cast_test.cu
+++ b/cpp/tests/io/json_type_cast_test.cu
@@ -21,15 +21,20 @@
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <io/utilities/string_parsing.hpp>
+
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/datasource.hpp>
-#include <cudf/io/detail/data_casting.cuh>
 #include <cudf/io/json.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 
+#include <rmm/exec_policy.hpp>
+
+#include <algorithm>
+#include <iterator>
 #include <type_traits>
 
 using namespace cudf::test::iterators;
@@ -37,13 +42,27 @@ using namespace cudf::test::iterators;
 struct JSONTypeCastTest : public cudf::test::BaseFixture {};
 
 namespace {
-struct to_thrust_pair_fn {
-  __device__ thrust::pair<char const*, cudf::size_type> operator()(
-    thrust::pair<cudf::string_view, bool> const& p)
+struct offsets_to_length {
+  __device__ cudf::size_type operator()(thrust::tuple<cudf::size_type, cudf::size_type> const& p)
   {
-    return {p.first.data(), p.first.size_bytes()};
+    return thrust::get<1>(p) - thrust::get<0>(p);
   }
 };
+
+/// Returns length of each string in the column
+auto string_offset_to_length(cudf::strings_column_view const& column, rmm::cuda_stream_view stream)
+{
+  auto offsets_begin = column.offsets_begin();
+  auto offsets_pair =
+    thrust::make_zip_iterator(thrust::make_tuple(offsets_begin, thrust::next(offsets_begin)));
+  rmm::device_uvector<cudf::size_type> svs_length(column.size(), stream);
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
+                    offsets_pair,
+                    offsets_pair + column.size(),
+                    svs_length.begin(),
+                    offsets_to_length{});
+  return svs_length;
+}
 }  // namespace
 
 auto default_json_options()
@@ -67,26 +86,23 @@ TEST_F(JSONTypeCastTest, String)
   std::vector<char const*> input_values{"this", "is", "null", "of", "", "strings", R"("null")"};
   cudf::test::strings_column_wrapper input(input_values.begin(), input_values.end(), in_valids);
 
-  auto d_column = cudf::column_device_view::create(input);
-  rmm::device_uvector<thrust::pair<char const*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
-                    d_column->pair_begin<cudf::string_view, false>(),
-                    d_column->pair_end<cudf::string_view, false>(),
-                    svs.begin(),
-                    to_thrust_pair_fn{});
+  auto column                                     = cudf::strings_column_view(input);
+  rmm::device_uvector<cudf::size_type> svs_length = string_offset_to_length(column, stream);
 
   auto null_mask_it = no_nulls();
   auto null_mask =
-    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()));
-
-  auto str_col = cudf::io::json::detail::parse_data(svs.data(),
-                                                    svs.size(),
-                                                    type,
-                                                    std::move(null_mask),
-                                                    0,
-                                                    default_json_options().view(),
-                                                    stream,
-                                                    mr);
+    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size()));
+
+  auto str_col = cudf::io::json::detail::parse_data(
+    column.chars().data<char>(),
+    thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())),
+    column.size(),
+    type,
+    std::move(null_mask),
+    0,
+    default_json_options().view(),
+    stream,
+    mr);
 
   auto out_valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2 and i != 4; });
@@ -103,26 +119,23 @@ TEST_F(JSONTypeCastTest, Int)
   auto const type   = cudf::data_type{cudf::type_id::INT64};
 
   cudf::test::strings_column_wrapper data({"1", "null", "3", "true", "5", "false"});
-  auto d_column = cudf::column_device_view::create(data);
-  rmm::device_uvector<thrust::pair<char const*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
-                    d_column->pair_begin<cudf::string_view, false>(),
-                    d_column->pair_end<cudf::string_view, false>(),
-                    svs.begin(),
-                    to_thrust_pair_fn{});
+  auto column                                     = cudf::strings_column_view(data);
+  rmm::device_uvector<cudf::size_type> svs_length = string_offset_to_length(column, stream);
 
   auto null_mask_it = no_nulls();
   auto null_mask =
-    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()));
-
-  auto col = cudf::io::json::detail::parse_data(svs.data(),
-                                                svs.size(),
-                                                type,
-                                                std::move(null_mask),
-                                                0,
-                                                default_json_options().view(),
-                                                stream,
-                                                mr);
+    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size()));
+
+  auto col = cudf::io::json::detail::parse_data(
+    column.chars().data<char>(),
+    thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())),
+    column.size(),
+    type,
+    std::move(null_mask),
+    0,
+    default_json_options().view(),
+    stream,
+    mr);
 
   auto expected =
     cudf::test::fixed_width_column_wrapper<int64_t>{{1, 2, 3, 1, 5, 0}, {1, 0, 1, 1, 1, 1}};
@@ -146,26 +159,23 @@ TEST_F(JSONTypeCastTest, StringEscapes)
     R"("escape with nothing to escape \")",
     R"("\"\\\/\b\f\n\r\t")",
   });
-  auto d_column = cudf::column_device_view::create(data);
-  rmm::device_uvector<thrust::pair<char const*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
-                    d_column->pair_begin<cudf::string_view, false>(),
-                    d_column->pair_end<cudf::string_view, false>(),
-                    svs.begin(),
-                    to_thrust_pair_fn{});
+  auto column                                     = cudf::strings_column_view(data);
+  rmm::device_uvector<cudf::size_type> svs_length = string_offset_to_length(column, stream);
 
   auto null_mask_it = no_nulls();
   auto null_mask =
-    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + d_column->size()));
-
-  auto col = cudf::io::json::detail::parse_data(svs.data(),
-                                                svs.size(),
-                                                type,
-                                                std::move(null_mask),
-                                                0,
-                                                default_json_options().view(),
-                                                stream,
-                                                mr);
+    std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size()));
+
+  auto col = cudf::io::json::detail::parse_data(
+    column.chars().data<char>(),
+    thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())),
+    column.size(),
+    type,
+    std::move(null_mask),
+    0,
+    default_json_options().view(),
+    stream,
+    mr);
 
   auto expected = cudf::test::strings_column_wrapper{
     {"🚀", "Ａ🚀ＡＡ", "", "", "", "\\", "➩", "", "\"\\/\b\f\n\r\t"},
@@ -173,4 +183,71 @@ TEST_F(JSONTypeCastTest, StringEscapes)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(col->view(), expected);
 }
 
+TEST_F(JSONTypeCastTest, ErrorNulls)
+{
+  auto const stream = cudf::get_default_stream();
+  auto mr           = rmm::mr::get_current_device_resource();
+  auto const type   = cudf::data_type{cudf::type_id::STRING};
+
+  // error in decoding
+  std::vector<char const*> input_values{R"("\"\a")",
+                                        R"("\u")",
+                                        R"("\u0")",
+                                        R"("\u0b")",
+                                        R"("\u00b")",
+                                        R"("\u00bz")",
+                                        R"("\t34567890123456\t9012345678901\ug0bc")",
+                                        R"("\t34567890123456\t90123456789012\u0hbc")",
+                                        R"("\t34567890123456\t90123456789012\u00ic")",
+                                        R"("\t34567890123456\t9012345678901\")",
+                                        R"("\t34567890123456\t90123456789012\")",
+                                        R"(null)"};
+  // Note: without quotes are copied without decoding
+  cudf::test::strings_column_wrapper input(input_values.begin(), input_values.end());
+
+  auto column        = cudf::strings_column_view(input);
+  auto space_length  = 128;
+  auto prepend_space = [&space_length](auto const& s) {
+    if (s[0] == '"') return "\"" + std::string(space_length, ' ') + std::string(s + 1);
+    return std::string(s);
+  };
+  std::vector<std::string> small_input;
+  std::transform(
+    input_values.begin(), input_values.end(), std::back_inserter(small_input), prepend_space);
+  cudf::test::strings_column_wrapper small_col(small_input.begin(), small_input.end());
+
+  std::vector<std::string> large_input;
+  space_length = 128 * 128;
+  std::transform(
+    input_values.begin(), input_values.end(), std::back_inserter(large_input), prepend_space);
+  cudf::test::strings_column_wrapper large_col(large_input.begin(), large_input.end());
+
+  std::vector<char const*> expected_values{"", "", "", "", "", "", "", "", "", "", "", ""};
+  cudf::test::strings_column_wrapper expected(
+    expected_values.begin(), expected_values.end(), cudf::test::iterators::all_nulls());
+
+  // single threads, warp, block.
+  for (auto const& column :
+       {column, cudf::strings_column_view(small_col), cudf::strings_column_view(large_col)}) {
+    rmm::device_uvector<cudf::size_type> svs_length = string_offset_to_length(column, stream);
+
+    auto null_mask_it = no_nulls();
+    auto null_mask =
+      std::get<0>(cudf::test::detail::make_null_mask(null_mask_it, null_mask_it + column.size()));
+
+    auto str_col = cudf::io::json::detail::parse_data(
+      column.chars().data<char>(),
+      thrust::make_zip_iterator(thrust::make_tuple(column.offsets_begin(), svs_length.begin())),
+      column.size(),
+      type,
+      std::move(null_mask),
+      0,
+      default_json_options().view(),
+      stream,
+      mr);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(str_col->view(), expected);
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu
index b2eb1b94f9c..a14e7ecf5b3 100644
--- a/cpp/tests/io/type_inference_test.cu
+++ b/cpp/tests/io/type_inference_test.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
+#include <io/utilities/string_parsing.hpp>
 #include <io/utilities/trie.cuh>
-#include <io/utilities/type_inference.cuh>
 
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -50,8 +50,8 @@ TEST_F(TypeInference, Basic)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 4, 7};
-  auto const string_length   = std::vector<std::size_t>{2, 2, 1};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 4, 7};
+  auto const string_length   = std::vector<cudf::size_type>{2, 2, 1};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -83,8 +83,8 @@ TEST_F(TypeInference, Null)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 1, 4};
-  auto const string_length   = std::vector<std::size_t>{0, 2, 1};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 1, 4};
+  auto const string_length   = std::vector<cudf::size_type>{0, 2, 1};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -116,8 +116,8 @@ TEST_F(TypeInference, AllNull)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 1, 1};
-  auto const string_length   = std::vector<std::size_t>{0, 0, 4};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 1, 1};
+  auto const string_length   = std::vector<cudf::size_type>{0, 0, 4};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -149,8 +149,8 @@ TEST_F(TypeInference, String)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 8, 12};
-  auto const string_length   = std::vector<std::size_t>{6, 3, 4};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 8, 12};
+  auto const string_length   = std::vector<cudf::size_type>{6, 3, 4};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -182,8 +182,8 @@ TEST_F(TypeInference, Bool)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 6, 12};
-  auto const string_length   = std::vector<std::size_t>{4, 5, 5};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 6, 12};
+  auto const string_length   = std::vector<cudf::size_type>{4, 5, 5};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -215,8 +215,8 @@ TEST_F(TypeInference, Timestamp)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 10};
-  auto const string_length   = std::vector<std::size_t>{8, 9};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 10};
+  auto const string_length   = std::vector<cudf::size_type>{8, 9};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(
@@ -249,8 +249,8 @@ TEST_F(TypeInference, InvalidInput)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  auto const string_offset   = std::vector<int32_t>{1, 3, 5, 7, 9};
-  auto const string_length   = std::vector<std::size_t>{1, 1, 1, 1, 1};
+  auto const string_offset   = std::vector<cudf::size_type>{1, 3, 5, 7, 9};
+  auto const string_length   = std::vector<cudf::size_type>{1, 1, 1, 1, 1};
   auto const d_string_offset = cudf::detail::make_device_uvector_async(
     string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto const d_string_length = cudf::detail::make_device_uvector_async(