From daa1ea62508575b757b568646fef8c5ed938e272 Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Mon, 20 Nov 2023 20:46:04 -0800
Subject: [PATCH] Fix row conversion test (#1577)

* Fix sync issue

* Workaround the memory issue

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Add comment

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

---------

Signed-off-by: Nghia Truong <nghiat@nvidia.com>
---
 src/main/cpp/src/row_conversion.cu | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu
index c1f94598d0..bbfe50863a 100644
--- a/src/main/cpp/src/row_conversion.cu
+++ b/src/main/cpp/src/row_conversion.cu
@@ -81,6 +81,7 @@ constexpr auto NUM_WARPS_IN_BLOCK = 32;
 #pragma nv_diag_suppress static_var_with_dynamic_init
 
 using namespace cudf;
+using detail::make_device_uvector_sync;
 using detail::make_device_uvector_async;
 using rmm::device_uvector;
 
@@ -231,7 +232,7 @@ build_string_row_offsets(table_view const& tbl,
                  offsets_iter + tbl.num_columns(),
                  std::back_inserter(offsets_iterators),
                  [](auto const& offset_ptr) { return offset_ptr != nullptr; });
-    return make_device_uvector_async(
+    return make_device_uvector_sync(
       offsets_iterators, stream, rmm::mr::get_current_device_resource());
   }();
 
@@ -1556,8 +1557,13 @@ batch_data build_batches(size_type num_rows,
   batch_row_boundaries.push_back(0);
   size_type last_row_end = 0;
   device_uvector<uint64_t> cumulative_row_sizes(num_rows, stream);
-  thrust::inclusive_scan(
-    rmm::exec_policy(stream), row_sizes, row_sizes + num_rows, cumulative_row_sizes.begin());
+
+  // Evaluate the row size values before calling `inclusive_scan` to workaround
+  // memory issue in https://github.com/NVIDIA/spark-rapids-jni/issues/1567.
+  thrust::copy(rmm::exec_policy(stream), row_sizes, row_sizes + num_rows,
+               cumulative_row_sizes.begin());
+  thrust::inclusive_scan(rmm::exec_policy(stream), cumulative_row_sizes.begin(),
+                         cumulative_row_sizes.end(), cumulative_row_sizes.begin());
 
   // This needs to be split this into 2 gig batches. Care must be taken to avoid a batch larger than
   // 2 gigs. Imagine a table with 900 meg rows. The batches should occur every 2 rows, but if a