From daa1ea62508575b757b568646fef8c5ed938e272 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 20 Nov 2023 20:46:04 -0800 Subject: [PATCH] Fix row conversion test (#1577) * Fix sync issue * Workaround the memory issue Signed-off-by: Nghia Truong * Add comment Signed-off-by: Nghia Truong --------- Signed-off-by: Nghia Truong --- src/main/cpp/src/row_conversion.cu | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index c1f94598d0..bbfe50863a 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -81,6 +81,7 @@ constexpr auto NUM_WARPS_IN_BLOCK = 32; #pragma nv_diag_suppress static_var_with_dynamic_init using namespace cudf; +using detail::make_device_uvector_sync; using detail::make_device_uvector_async; using rmm::device_uvector; @@ -231,7 +232,7 @@ build_string_row_offsets(table_view const& tbl, offsets_iter + tbl.num_columns(), std::back_inserter(offsets_iterators), [](auto const& offset_ptr) { return offset_ptr != nullptr; }); - return make_device_uvector_async( + return make_device_uvector_sync( offsets_iterators, stream, rmm::mr::get_current_device_resource()); }(); @@ -1556,8 +1557,13 @@ batch_data build_batches(size_type num_rows, batch_row_boundaries.push_back(0); size_type last_row_end = 0; device_uvector cumulative_row_sizes(num_rows, stream); - thrust::inclusive_scan( - rmm::exec_policy(stream), row_sizes, row_sizes + num_rows, cumulative_row_sizes.begin()); + + // Evaluate the row size values before calling `inclusive_scan` to workaround + // memory issue in https://github.com/NVIDIA/spark-rapids-jni/issues/1567. + thrust::copy(rmm::exec_policy(stream), row_sizes, row_sizes + num_rows, + cumulative_row_sizes.begin()); + thrust::inclusive_scan(rmm::exec_policy(stream), cumulative_row_sizes.begin(), + cumulative_row_sizes.end(), cumulative_row_sizes.begin()); // This needs to be split this into 2 gig batches. Care must be taken to avoid a batch larger than // 2 gigs. Imagine a table with 900 meg rows. The batches should occur every 2 rows, but if a