Skip to content

Commit

Permalink
Fix row conversion test (#1577)
Browse files Browse the repository at this point in the history
* Fix sync issue

* Workaround the memory issue

Signed-off-by: Nghia Truong <[email protected]>

* Add comment

Signed-off-by: Nghia Truong <[email protected]>

---------

Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia authored Nov 21, 2023
1 parent 3edf053 commit daa1ea6
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/main/cpp/src/row_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ constexpr auto NUM_WARPS_IN_BLOCK = 32;
#pragma nv_diag_suppress static_var_with_dynamic_init

using namespace cudf;
using detail::make_device_uvector_sync;
using detail::make_device_uvector_async;
using rmm::device_uvector;

Expand Down Expand Up @@ -231,7 +232,7 @@ build_string_row_offsets(table_view const& tbl,
offsets_iter + tbl.num_columns(),
std::back_inserter(offsets_iterators),
[](auto const& offset_ptr) { return offset_ptr != nullptr; });
return make_device_uvector_async(
return make_device_uvector_sync(
offsets_iterators, stream, rmm::mr::get_current_device_resource());
}();

Expand Down Expand Up @@ -1556,8 +1557,13 @@ batch_data build_batches(size_type num_rows,
batch_row_boundaries.push_back(0);
size_type last_row_end = 0;
device_uvector<uint64_t> cumulative_row_sizes(num_rows, stream);
thrust::inclusive_scan(
rmm::exec_policy(stream), row_sizes, row_sizes + num_rows, cumulative_row_sizes.begin());

// Evaluate the row size values before calling `inclusive_scan` to workaround
// memory issue in https://github.com/NVIDIA/spark-rapids-jni/issues/1567.
thrust::copy(rmm::exec_policy(stream), row_sizes, row_sizes + num_rows,
cumulative_row_sizes.begin());
thrust::inclusive_scan(rmm::exec_policy(stream), cumulative_row_sizes.begin(),
cumulative_row_sizes.end(), cumulative_row_sizes.begin());

// This needs to be split this into 2 gig batches. Care must be taken to avoid a batch larger than
// 2 gigs. Imagine a table with 900 meg rows. The batches should occur every 2 rows, but if a
Expand Down

0 comments on commit daa1ea6

Please sign in to comment.