diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 84b462bb884..136f43ee706 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -380,6 +380,7 @@ add_library(
   src/io/functions.cpp
   src/io/json/host_tree_algorithms.cu
   src/io/json/json_column.cu
+  src/io/json/column_tree_construction.cu
   src/io/json/json_normalization.cu
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu
new file mode 100644
index 00000000000..c4fe7926706
--- /dev/null
+++ b/cpp/src/io/json/column_tree_construction.cu
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nested_json.hpp"
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cuda/functional>
+#include <thrust/for_each.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/transform_scan.h>
+#include <thrust/unique.h>
+
+namespace cudf::io::json {
+
+using row_offset_t = size_type;
+
+#ifdef CSR_DEBUG_PRINT
+template <typename T>
+void print(device_span<T const> d_vec, std::string name, rmm::cuda_stream_view stream)
+{
+  stream.synchronize();
+  auto h_vec = cudf::detail::make_std_vector_sync(d_vec, stream);
+  std::cout << name << " = ";
+  for (auto e : h_vec) {
+    std::cout << e << " ";
+  }
+  std::cout << std::endl;
+}
+#endif
+
+namespace experimental::detail {
+
+struct level_ordering {
+  device_span<TreeDepthT const> node_levels;
+  device_span<NodeIndexT const> col_ids;
+  device_span<NodeIndexT const> parent_node_ids;
+  __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const
+  {
+    auto lhs_parent_col_id = parent_node_ids[lhs_node_id] == parent_node_sentinel
+                               ? parent_node_sentinel
+                               : col_ids[parent_node_ids[lhs_node_id]];
+    auto rhs_parent_col_id = parent_node_ids[rhs_node_id] == parent_node_sentinel
+                               ? parent_node_sentinel
+                               : col_ids[parent_node_ids[rhs_node_id]];
+
+    return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) ||
+           (node_levels[lhs_node_id] == node_levels[rhs_node_id] &&
+            lhs_parent_col_id < rhs_parent_col_id) ||
+           (node_levels[lhs_node_id] == node_levels[rhs_node_id] &&
+            lhs_parent_col_id == rhs_parent_col_id && col_ids[lhs_node_id] < col_ids[rhs_node_id]);
+  }
+};
+
+struct parent_nodeids_to_colids {
+  device_span<NodeIndexT const> rev_mapped_col_ids;
+  __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT
+  {
+    return parent_node_id == parent_node_sentinel ? parent_node_sentinel
+                                                  : rev_mapped_col_ids[parent_node_id];
+  }
+};
+
+/**
+ * @brief Reduces node tree representation to column tree CSR representation.
+ *
+ * @param node_tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A tuple of column tree representation of JSON string, column ids of columns, and
+ * max row offsets of columns
+ */
+std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(
+  tree_meta_t& node_tree,
+  device_span<NodeIndexT const> original_col_ids,
+  device_span<NodeIndexT const> sorted_col_ids,
+  device_span<NodeIndexT const> ordered_node_ids,
+  device_span<row_offset_t const> row_offsets,
+  bool is_array_of_arrays,
+  NodeIndexT row_array_parent_col_id,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+
+  if (original_col_ids.empty()) {
+    rmm::device_uvector<NodeIndexT> empty_row_idx(0, stream);
+    rmm::device_uvector<NodeIndexT> empty_col_idx(0, stream);
+    rmm::device_uvector<NodeT> empty_column_categories(0, stream);
+    rmm::device_uvector<row_offset_t> empty_max_row_offsets(0, stream);
+    rmm::device_uvector<NodeIndexT> empty_mapped_col_ids(0, stream);
+    return std::tuple{compressed_sparse_row{std::move(empty_row_idx), std::move(empty_col_idx)},
+                      column_tree_properties{std::move(empty_column_categories),
+                                             std::move(empty_max_row_offsets),
+                                             std::move(empty_mapped_col_ids)}};
+  }
+
+  auto [unpermuted_tree, unpermuted_col_ids, unpermuted_max_row_offsets] =
+    cudf::io::json::detail::reduce_to_column_tree(node_tree,
+                                                  original_col_ids,
+                                                  sorted_col_ids,
+                                                  ordered_node_ids,
+                                                  row_offsets,
+                                                  is_array_of_arrays,
+                                                  row_array_parent_col_id,
+                                                  stream);
+
+  NodeIndexT num_columns = unpermuted_col_ids.size();
+
+  auto mapped_col_ids = cudf::detail::make_device_uvector_async(
+    unpermuted_col_ids, stream, cudf::get_current_device_resource_ref());
+  rmm::device_uvector<NodeIndexT> rev_mapped_col_ids(num_columns, stream);
+  rmm::device_uvector<NodeIndexT> reordering_index(unpermuted_col_ids.size(), stream);
+
+  thrust::sequence(
+    rmm::exec_policy_nosync(stream), reordering_index.begin(), reordering_index.end());
+  // Reorder nodes and column ids in level-wise fashion
+  thrust::sort_by_key(
+    rmm::exec_policy_nosync(stream),
+    reordering_index.begin(),
+    reordering_index.end(),
+    mapped_col_ids.begin(),
+    level_ordering{
+      unpermuted_tree.node_levels, unpermuted_col_ids, unpermuted_tree.parent_node_ids});
+
+  {
+    auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async(
+      mapped_col_ids, stream, cudf::get_current_device_resource_ref());
+    thrust::sequence(
+      rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end());
+    thrust::sort_by_key(rmm::exec_policy_nosync(stream),
+                        mapped_col_ids_copy.begin(),
+                        mapped_col_ids_copy.end(),
+                        rev_mapped_col_ids.begin());
+  }
+
+  rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
+  thrust::transform_output_iterator parent_col_ids_it(parent_col_ids.begin(),
+                                                      parent_nodeids_to_colids{rev_mapped_col_ids});
+  rmm::device_uvector<row_offset_t> max_row_offsets(num_columns, stream);
+  rmm::device_uvector<NodeT> column_categories(num_columns, stream);
+  thrust::copy_n(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_zip_iterator(thrust::make_permutation_iterator(
+                                unpermuted_tree.parent_node_ids.begin(), reordering_index.begin()),
+                              thrust::make_permutation_iterator(unpermuted_max_row_offsets.begin(),
+                                                                reordering_index.begin()),
+                              thrust::make_permutation_iterator(
+                                unpermuted_tree.node_categories.begin(), reordering_index.begin())),
+    num_columns,
+    thrust::make_zip_iterator(
+      parent_col_ids_it, max_row_offsets.begin(), column_categories.begin()));
+
+#ifdef CSR_DEBUG_PRINT
+  print<NodeIndexT>(reordering_index, "h_reordering_index", stream);
+  print<NodeIndexT>(mapped_col_ids, "h_mapped_col_ids", stream);
+  print<NodeIndexT>(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream);
+  print<NodeIndexT>(parent_col_ids, "h_parent_col_ids", stream);
+  print<row_offset_t>(max_row_offsets, "h_max_row_offsets", stream);
+#endif
+
+  auto construct_row_idx = [&stream](NodeIndexT num_columns,
+                                     device_span<NodeIndexT const> parent_col_ids) {
+    auto row_idx = cudf::detail::make_zeroed_device_uvector_async<NodeIndexT>(
+      static_cast<std::size_t>(num_columns + 1), stream, cudf::get_current_device_resource_ref());
+    // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel)
+    // children adjacency
+
+    auto num_non_leaf_columns = thrust::unique_count(
+      rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end());
+    rmm::device_uvector<NodeIndexT> non_leaf_nodes(num_non_leaf_columns, stream);
+    rmm::device_uvector<NodeIndexT> non_leaf_nodes_children(num_non_leaf_columns, stream);
+    thrust::reduce_by_key(rmm::exec_policy_nosync(stream),
+                          parent_col_ids.begin() + 1,
+                          parent_col_ids.end(),
+                          thrust::make_constant_iterator(1),
+                          non_leaf_nodes.begin(),
+                          non_leaf_nodes_children.begin(),
+                          thrust::equal_to<TreeDepthT>());
+
+    thrust::scatter(rmm::exec_policy_nosync(stream),
+                    non_leaf_nodes_children.begin(),
+                    non_leaf_nodes_children.end(),
+                    non_leaf_nodes.begin(),
+                    row_idx.begin() + 1);
+
+    if (num_columns > 1) {
+      thrust::transform_inclusive_scan(
+        rmm::exec_policy_nosync(stream),
+        thrust::make_zip_iterator(thrust::make_counting_iterator(1), row_idx.begin() + 1),
+        thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, row_idx.end()),
+        row_idx.begin() + 1,
+        cuda::proclaim_return_type<NodeIndexT>([] __device__(auto a) {
+          auto n   = thrust::get<0>(a);
+          auto idx = thrust::get<1>(a);
+          return n == 1 ? idx : idx + 1;
+        }),
+        thrust::plus<NodeIndexT>{});
+    } else {
+      auto single_node = 1;
+      row_idx.set_element_async(1, single_node, stream);
+    }
+
+#ifdef CSR_DEBUG_PRINT
+    print<NodeIndexT>(row_idx, "h_row_idx", stream);
+#endif
+    return row_idx;
+  };
+
+  auto construct_col_idx = [&stream](NodeIndexT num_columns,
+                                     device_span<NodeIndexT const> parent_col_ids,
+                                     device_span<NodeIndexT const> row_idx) {
+    rmm::device_uvector<NodeIndexT> col_idx((num_columns - 1) * 2, stream);
+    thrust::fill(rmm::exec_policy_nosync(stream), col_idx.begin(), col_idx.end(), -1);
+    // excluding root node, construct scatter map
+    rmm::device_uvector<NodeIndexT> map(num_columns - 1, stream);
+    thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream),
+                                  parent_col_ids.begin() + 1,
+                                  parent_col_ids.end(),
+                                  thrust::make_constant_iterator(1),
+                                  map.begin());
+    thrust::for_each_n(rmm::exec_policy_nosync(stream),
+                       thrust::make_counting_iterator(1),
+                       num_columns - 1,
+                       [row_idx        = row_idx.begin(),
+                        map            = map.begin(),
+                        parent_col_ids = parent_col_ids.begin()] __device__(auto i) {
+                         auto parent_col_id = parent_col_ids[i];
+                         if (parent_col_id == 0)
+                           --map[i - 1];
+                         else
+                           map[i - 1] += row_idx[parent_col_id];
+                       });
+    thrust::scatter(rmm::exec_policy_nosync(stream),
+                    thrust::make_counting_iterator(1),
+                    thrust::make_counting_iterator(1) + num_columns - 1,
+                    map.begin(),
+                    col_idx.begin());
+
+    // Skip the parent of root node
+    thrust::scatter(rmm::exec_policy_nosync(stream),
+                    parent_col_ids.begin() + 1,
+                    parent_col_ids.end(),
+                    row_idx.begin() + 1,
+                    col_idx.begin());
+
+#ifdef CSR_DEBUG_PRINT
+    print<NodeIndexT>(col_idx, "h_col_idx", stream);
+#endif
+
+    return col_idx;
+  };
+
+  /*
+    5. CSR construction:
+      a. Sort column levels and get their ordering
+      b. For each column node coln iterated according to sorted_column_levels; do
+          i. Find nodes that have coln as the parent node -> set adj_coln
+          ii. row idx[coln] = size of adj_coln + 1
+          iii. col idx[coln] = adj_coln U {parent_col_id[coln]}
+  */
+  auto row_idx = construct_row_idx(num_columns, parent_col_ids);
+  auto col_idx = construct_col_idx(num_columns, parent_col_ids, row_idx);
+
+  return std::tuple{
+    compressed_sparse_row{std::move(row_idx), std::move(col_idx)},
+    column_tree_properties{
+      std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}};
+}
+
+}  // namespace experimental::detail
+}  // namespace cudf::io::json
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index b08fd139113..dfd9285f682 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -47,7 +47,6 @@
 
 namespace cudf::io::json::detail {
 
-// DEBUG prints
 auto to_cat = [](auto v) -> std::string {
   switch (v) {
     case NC_STRUCT: return " S";
@@ -106,18 +105,19 @@ void print_tree(host_span<SymbolT const> input,
  */
 std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
 reduce_to_column_tree(tree_meta_t& tree,
-                      device_span<NodeIndexT> original_col_ids,
-                      device_span<NodeIndexT> sorted_col_ids,
-                      device_span<NodeIndexT> ordered_node_ids,
-                      device_span<size_type> row_offsets,
+                      device_span<NodeIndexT const> original_col_ids,
+                      device_span<NodeIndexT const> sorted_col_ids,
+                      device_span<NodeIndexT const> ordered_node_ids,
+                      device_span<size_type const> row_offsets,
                       bool is_array_of_arrays,
                       NodeIndexT const row_array_parent_col_id,
                       rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
+
   // 1. column count for allocation
-  auto const num_columns =
-    thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end());
+  auto const num_columns = thrust::unique_count(
+    rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end());
 
   // 2. reduce_by_key {col_id}, {row_offset}, max.
   rmm::device_uvector<NodeIndexT> unique_col_ids(num_columns, stream);
@@ -162,30 +162,34 @@ reduce_to_column_tree(tree_meta_t& tree,
     });
 
   // 4. unique_copy parent_node_ids, ranges
-  rmm::device_uvector<TreeDepthT> column_levels(0, stream);  // not required
+  rmm::device_uvector<TreeDepthT> column_levels(num_columns, stream);  // not required
   rmm::device_uvector<NodeIndexT> parent_col_ids(num_columns, stream);
   rmm::device_uvector<SymbolOffsetT> col_range_begin(num_columns, stream);  // Field names
   rmm::device_uvector<SymbolOffsetT> col_range_end(num_columns, stream);
   rmm::device_uvector<size_type> unique_node_ids(num_columns, stream);
-  thrust::unique_by_key_copy(rmm::exec_policy(stream),
+  thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream),
                              sorted_col_ids.begin(),
                              sorted_col_ids.end(),
                              ordered_node_ids.begin(),
                              thrust::make_discard_iterator(),
                              unique_node_ids.begin());
+
   thrust::copy_n(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::make_zip_iterator(
+      thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()),
       thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()),
       thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()),
       thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())),
     unique_node_ids.size(),
-    thrust::make_zip_iterator(
-      parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin()));
+    thrust::make_zip_iterator(column_levels.begin(),
+                              parent_col_ids.begin(),
+                              col_range_begin.begin(),
+                              col_range_end.begin()));
 
   // convert parent_node_ids to parent_col_ids
   thrust::transform(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     parent_col_ids.begin(),
     parent_col_ids.end(),
     parent_col_ids.begin(),
@@ -203,18 +207,17 @@ reduce_to_column_tree(tree_meta_t& tree,
              column_categories[parent_col_id] == NC_LIST &&
                (!is_array_of_arrays || parent_col_id != row_array_parent_col_id));
   };
+
   // Mixed types in List children go to different columns,
   // so all immediate children of list column should have same max_row_offsets.
   //   create list's children max_row_offsets array. (initialize to zero)
   //   atomicMax on  children max_row_offsets array.
   //   gather the max_row_offsets from children row offset array.
   {
-    rmm::device_uvector<NodeIndexT> list_parents_children_max_row_offsets(num_columns, stream);
-    thrust::fill(rmm::exec_policy(stream),
-                 list_parents_children_max_row_offsets.begin(),
-                 list_parents_children_max_row_offsets.end(),
-                 0);
-    thrust::for_each(rmm::exec_policy(stream),
+    auto list_parents_children_max_row_offsets =
+      cudf::detail::make_zeroed_device_uvector_async<NodeIndexT>(
+        static_cast<std::size_t>(num_columns), stream, cudf::get_current_device_resource_ref());
+    thrust::for_each(rmm::exec_policy_nosync(stream),
                      unique_col_ids.begin(),
                      unique_col_ids.end(),
                      [column_categories = column_categories.begin(),
@@ -230,8 +233,9 @@ reduce_to_column_tree(tree_meta_t& tree,
                          ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed);
                        }
                      });
+
     thrust::gather_if(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       parent_col_ids.begin(),
       parent_col_ids.end(),
       parent_col_ids.begin(),
@@ -246,7 +250,7 @@ reduce_to_column_tree(tree_meta_t& tree,
   // copy lists' max_row_offsets to children.
   // all structs should have same size.
   thrust::transform_if(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     unique_col_ids.begin(),
     unique_col_ids.end(),
     max_row_offsets.begin(),
@@ -272,7 +276,7 @@ reduce_to_column_tree(tree_meta_t& tree,
 
   // For Struct and List (to avoid copying entire strings when mixed type as string is enabled)
   thrust::transform_if(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     col_range_begin.begin(),
     col_range_begin.end(),
     column_categories.begin(),
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 83f71e657a7..93ef2b46be1 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -185,6 +185,55 @@ struct device_json_column {
   }
 };
 
+namespace experimental {
+/*
+ * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format.
+ */
+struct compressed_sparse_row {
+  rmm::device_uvector<NodeIndexT> row_idx;
+  rmm::device_uvector<NodeIndexT> col_idx;
+};
+
+/*
+ * @brief Auxiliary column tree properties that are required to construct the device json
+ * column subtree, but not required for the final cudf column construction.
+ */
+struct column_tree_properties {
+  rmm::device_uvector<NodeT> categories;
+  rmm::device_uvector<size_type> max_row_offsets;
+  rmm::device_uvector<NodeIndexT> mapped_ids;
+};
+
+namespace detail {
+/**
+ * @brief Reduce node tree into column tree by aggregating each property of column.
+ *
+ * @param node_tree Node tree representation of JSON string
+ * @param original_col_ids Column ids of nodes
+ * @param sorted_col_ids Sorted column ids of nodes
+ * @param ordered_node_ids Node ids of nodes sorted by column ids
+ * @param row_offsets Row offsets of nodes
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return Tuple of compressed_sparse_row struct storing adjacency information of the column tree,
+ * and column_tree_properties struct storing properties of each node i.e. column category, max
+ * number of rows in the column, and column id
+ */
+CUDF_EXPORT
+std::tuple<compressed_sparse_row, column_tree_properties> reduce_to_column_tree(
+  tree_meta_t& node_tree,
+  device_span<NodeIndexT const> original_col_ids,
+  device_span<NodeIndexT const> sorted_col_ids,
+  device_span<NodeIndexT const> ordered_node_ids,
+  device_span<size_type const> row_offsets,
+  bool is_array_of_arrays,
+  NodeIndexT row_array_parent_col_id,
+  rmm::cuda_stream_view stream);
+
+}  // namespace detail
+}  // namespace experimental
+
 namespace detail {
 
 // TODO: return device_uvector instead of passing pre-allocated memory
@@ -303,7 +352,7 @@ get_array_children_indices(TreeDepthT row_array_children_level,
 /**
  * @brief Reduces node tree representation to column tree representation.
  *
- * @param tree Node tree representation of JSON string
+ * @param node_tree Node tree representation of JSON string
  * @param original_col_ids Column ids of nodes
  * @param sorted_col_ids Sorted column ids of nodes
  * @param ordered_node_ids Node ids of nodes sorted by column ids
@@ -314,12 +363,13 @@ get_array_children_indices(TreeDepthT row_array_children_level,
  * @return A tuple of column tree representation of JSON string, column ids of columns, and
  * max row offsets of columns
  */
+CUDF_EXPORT
 std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
-reduce_to_column_tree(tree_meta_t& tree,
-                      device_span<NodeIndexT> original_col_ids,
-                      device_span<NodeIndexT> sorted_col_ids,
-                      device_span<NodeIndexT> ordered_node_ids,
-                      device_span<size_type> row_offsets,
+reduce_to_column_tree(tree_meta_t& node_tree,
+                      device_span<NodeIndexT const> original_col_ids,
+                      device_span<NodeIndexT const> sorted_col_ids,
+                      device_span<NodeIndexT const> ordered_node_ids,
+                      device_span<size_type const> row_offsets,
                       bool is_array_of_arrays,
                       NodeIndexT const row_array_parent_col_id,
                       rmm::cuda_stream_view stream);
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 288fa84a73d..b67d922d377 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -329,6 +329,7 @@ ConfigureTest(NESTED_JSON_TEST io/json/nested_json_test.cpp io/json/json_tree.cp
 ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
 ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp)
 ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu)
+ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu)
 ConfigureTest(
   DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp
   GPUS 1
diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu
new file mode 100644
index 00000000000..a336b327732
--- /dev/null
+++ b/cpp/tests/io/json/json_tree_csr.cu
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "io/json/nested_json.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/io/detail/tokenize_json.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+
+#include <string>
+
+namespace cuio_json = cudf::io::json;
+
+struct h_tree_meta_t {
+  std::vector<cuio_json::NodeT> node_categories;
+  std::vector<cuio_json::NodeIndexT> parent_node_ids;
+  std::vector<cuio_json::SymbolOffsetT> node_range_begin;
+  std::vector<cuio_json::SymbolOffsetT> node_range_end;
+};
+
+struct h_column_tree {
+  // position of nnzs
+  std::vector<cuio_json::NodeIndexT> row_idx;
+  std::vector<cuio_json::NodeIndexT> col_idx;
+  // node properties
+  std::vector<cuio_json::NodeT> categories;
+  std::vector<cuio_json::NodeIndexT> column_ids;
+};
+
+// debug printing
+template <typename T>
+void print(cudf::host_span<T const> vec, std::string name)
+{
+  std::cout << name << " = ";
+  for (auto e : vec) {
+    std::cout << e << " ";
+  }
+  std::cout << std::endl;
+}
+
+bool check_equality(cuio_json::tree_meta_t& d_a,
+                    cudf::device_span<cudf::size_type const> d_a_max_row_offsets,
+                    cuio_json::experimental::compressed_sparse_row& d_b_csr,
+                    cuio_json::experimental::column_tree_properties& d_b_ctp,
+                    rmm::cuda_stream_view stream)
+{
+  // convert from tree_meta_t to column_tree_csr
+  stream.synchronize();
+
+  h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream),
+                  cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream),
+                  cudf::detail::make_std_vector_async(d_a.node_range_begin, stream),
+                  cudf::detail::make_std_vector_async(d_a.node_range_end, stream)};
+
+  h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.row_idx, stream),
+                  cudf::detail::make_std_vector_async(d_b_csr.col_idx, stream),
+                  cudf::detail::make_std_vector_async(d_b_ctp.categories, stream),
+                  cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)};
+
+  auto a_max_row_offsets = cudf::detail::make_std_vector_async(d_a_max_row_offsets, stream);
+  auto b_max_row_offsets = cudf::detail::make_std_vector_async(d_b_ctp.max_row_offsets, stream);
+
+  stream.synchronize();
+
+  auto num_nodes = a.parent_node_ids.size();
+  if (num_nodes > 1) {
+    if (b.row_idx.size() != num_nodes + 1) { return false; }
+
+    for (auto pos = b.row_idx[0]; pos < b.row_idx[1]; pos++) {
+      auto v = b.col_idx[pos];
+      if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; }
+    }
+    for (size_t u = 1; u < num_nodes; u++) {
+      auto v = b.col_idx[b.row_idx[u]];
+      if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; }
+
+      for (auto pos = b.row_idx[u] + 1; pos < b.row_idx[u + 1]; pos++) {
+        v = b.col_idx[pos];
+        if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; }
+      }
+    }
+    for (size_t u = 0; u < num_nodes; u++) {
+      if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; }
+    }
+    for (size_t u = 0; u < num_nodes; u++) {
+      if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; }
+    }
+  } else if (num_nodes == 1) {
+    if (b.row_idx.size() != num_nodes + 1) { return false; }
+
+    if (b.row_idx[0] != 0 || b.row_idx[1] != 1) return false;
+    if (!b.col_idx.empty()) return false;
+    for (size_t u = 0; u < num_nodes; u++) {
+      if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; }
+    }
+
+    for (size_t u = 0; u < num_nodes; u++) {
+      if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; }
+    }
+  }
+  return true;
+}
+
+void run_test(std::string const& input, bool enable_lines = true)
+{
+  auto const stream = cudf::get_default_stream();
+  cudf::string_scalar d_scalar(input, true, stream);
+  auto d_input = cudf::device_span<cuio_json::SymbolT const>{d_scalar.data(),
+                                                             static_cast<size_t>(d_scalar.size())};
+
+  cudf::io::json_reader_options options{};
+  options.enable_lines(enable_lines);
+  options.enable_mixed_types_as_string(true);
+
+  // Parse the JSON and get the token stream
+  auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream(
+    d_input, options, stream, cudf::get_current_device_resource_ref());
+
+  // Get the JSON's tree representation
+  auto gpu_tree =
+    cuio_json::detail::get_tree_representation(tokens_gpu,
+                                               token_indices_gpu,
+                                               options.is_enabled_mixed_types_as_string(),
+                                               stream,
+                                               cudf::get_current_device_resource_ref());
+
+  bool const is_array_of_arrays = [&]() {
+    std::array<cuio_json::node_t, 2> h_node_categories = {cuio_json::NC_ERR, cuio_json::NC_ERR};
+    auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size());
+    CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(),
+                                  gpu_tree.node_categories.data(),
+                                  sizeof(cuio_json::node_t) * size_to_copy,
+                                  cudaMemcpyDefault,
+                                  stream.value()));
+    stream.synchronize();
+    if (options.is_enabled_lines()) return h_node_categories[0] == cuio_json::NC_LIST;
+    return h_node_categories[0] == cuio_json::NC_LIST and
+           h_node_categories[1] == cuio_json::NC_LIST;
+  }();
+
+  auto tup =
+    cuio_json::detail::records_orient_tree_traversal(d_input,
+                                                     gpu_tree,
+                                                     is_array_of_arrays,
+                                                     options.is_enabled_lines(),
+                                                     stream,
+                                                     rmm::mr::get_current_device_resource());
+  auto& gpu_col_id      = std::get<0>(tup);
+  auto& gpu_row_offsets = std::get<1>(tup);
+
+  auto const num_nodes = gpu_col_id.size();
+  rmm::device_uvector<cudf::size_type> sorted_col_ids(gpu_col_id.size(), stream);  // make a copy
+  thrust::copy(
+    rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin());
+
+  // sort by {col_id} on {node_ids} stable
+  rmm::device_uvector<cudf::size_type> node_ids(gpu_col_id.size(), stream);
+  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
+
+  cudf::size_type const row_array_parent_col_id = [&]() {
+    cudf::size_type value      = cuio_json::parent_node_sentinel;
+    auto const list_node_index = options.is_enabled_lines() ? 0 : 1;
+    CUDF_CUDA_TRY(cudaMemcpyAsync(&value,
+                                  gpu_col_id.data() + list_node_index,
+                                  sizeof(cudf::size_type),
+                                  cudaMemcpyDefault,
+                                  stream.value()));
+    stream.synchronize();
+    return value;
+  }();
+
+  auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] =
+    cudf::io::json::detail::reduce_to_column_tree(gpu_tree,
+                                                  gpu_col_id,
+                                                  sorted_col_ids,
+                                                  node_ids,
+                                                  gpu_row_offsets,
+                                                  is_array_of_arrays,
+                                                  row_array_parent_col_id,
+                                                  stream);
+
+  auto [d_column_tree_csr, d_column_tree_properties] =
+    cudf::io::json::experimental::detail::reduce_to_column_tree(gpu_tree,
+                                                                gpu_col_id,
+                                                                sorted_col_ids,
+                                                                node_ids,
+                                                                gpu_row_offsets,
+                                                                is_array_of_arrays,
+                                                                row_array_parent_col_id,
+                                                                stream);
+
+  auto iseq = check_equality(
+    d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream);
+  // assert equality between csr and meta formats
+  ASSERT_TRUE(iseq);
+}
+
+struct JsonColumnTreeTests : public cudf::test::BaseFixture {};
+
+TEST_F(JsonColumnTreeTests, JSONL_Small)
+{
+  std::string const input =
+    R"(  {}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )";  // Prepare input & output buffers
+  run_test(input);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_Large)
+{
+  std::string const input =
+    R"(  {}
+    {}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }}
+ { "a": { "y" : 6, "z": [] }}
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )";
+  run_test(input);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_ListofStruct)
+{
+  std::string const input = R"(
+  { "Root": { "Key": [ { "EE": "A" } ] } }
+  { "Root": { "Key": {  } } }
+  { "Root": { "Key": [{ "YY": 1}] } }
+  )";
+  run_test(input);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_MissingEntries)
+{
+  std::string json_stringl = R"(
+    {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true}
+    {"a": 1, "b": {"0": "abc"          }, "c": false}
+    {"a": 1, "b": {}}
+    {"a": 1,                              "c": null}
+    )";
+  run_test(json_stringl);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_MoreMissingEntries)
+{
+  std::string json_stringl = R"(
+    { "foo1": [1,2,3], "bar": 123 }
+    { "foo2": { "a": 1 }, "bar": 456 }
+    { "foo1": [1,2,3], "bar": 123 }
+    { "foo2": { "a": 1 }, "bar": 456 }
+    { "foo1": [1,2,3], "bar": 123 }
+    { "foo2": { "a": 1 }, "bar": 456 }
+    )";
+  run_test(json_stringl);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_StillMoreMissingEntries)
+{
+  std::string json_stringl = R"(
+    { "foo1": [1,2,3], "bar": 123 }
+    { "foo2": { "a": 1 }, "bar": 456 }
+    { "foo1": ["123","456"], "bar": 123 }
+    { "foo2": { "b": 5 }, "car": 456 }
+    { "foo1": [1,2,3], "bar": 123 }
+    { "foo2": { "a": 1 }, "bar": 456 }
+    )";
+  run_test(json_stringl);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_MissingEntries)
+{
+  std::string json_string = R"([
+    {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true},
+    {"a": 1, "b": {"0": "abc"          }, "c": false},
+    {"a": 1, "b": {}},
+    {"a": 1,                              "c": null}
+    ])";
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_StructOfStructs)
+{
+  std::string json_string =
+    R"([
+    {},
+    { "a": { "y" : 6, "z": [] }},
+    { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }}
+    ])";  // Prepare input & output buffers
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_ArrayOfArrays_NestedList)
+{
+  std::string json_string =
+    R"([123, [1,2,3]]
+       [456, null,  { "a": 1 }])";
+  run_test(json_string);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_ArrayofArrays_NestedList)
+{
+  std::string json_string = R"([[[1,2,3], null, 123],
+              [null, { "a": 1 }, 456 ]])";
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_CornerCase_Empty)
+{
+  std::string json_string = R"([])";
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_CornerCase_List)
+{
+  std::string json_string = R"([123])";
+  run_test(json_string, true);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_CornerCase_EmptyNestedList)
+{
+  std::string json_string = R"([[[]]])";
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSON_CornerCase_EmptyNestedLists)
+{
+  std::string json_string = R"([[], [], []])";
+  run_test(json_string, false);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_CornerCase_ListofLists)
+{
+  std::string json_string = R"([[1, 2, 3], [4, 5, null], []])";
+  run_test(json_string, true);
+}
+
+TEST_F(JsonColumnTreeTests, JSONL_CornerCase_EmptyListOfLists)
+{
+  std::string json_string = R"([[]])";
+  run_test(json_string, true);
+}