|
1 | 1 | /*
|
2 |
| - * Copyright (c) 2023-2024, NVIDIA CORPORATION. |
| 2 | + * Copyright (c) 2023-2025, NVIDIA CORPORATION. |
3 | 3 | *
|
4 | 4 | * Licensed under the Apache License, Version 2.0 (the "License");
|
5 | 5 | * you may not use this file except in compliance with the License.
|
@@ -376,6 +376,48 @@ std::unique_ptr<column> struct_to_strings(table_view const& strings_columns,
|
376 | 376 | {});
|
377 | 377 | }
|
378 | 378 |
|
| 379 | +struct scatter_fn { |
| 380 | + column_device_view _col; |
| 381 | + size_type* _d_strview_offsets; |
| 382 | + string_view* _d_strviews; |
| 383 | + size_type const* _labels; |
| 384 | + size_type const* _list_offsets; |
| 385 | + column_device_view _d_strings_children; |
| 386 | + string_view _element_seperator; |
| 387 | + string_view _element_narep; |
| 388 | + |
| 389 | + scatter_fn(column_device_view col, |
| 390 | + size_type* d_strview_offsets, |
| 391 | + string_view* d_strviews, |
| 392 | + size_type const* labels, |
| 393 | + size_type const* list_offsets, |
| 394 | + column_device_view d_strings_children, |
| 395 | + string_view const element_separator, |
| 396 | + string_view const element_narep) noexcept |
| 397 | + : _col{col}, |
| 398 | + _d_strview_offsets{d_strview_offsets}, |
| 399 | + _d_strviews{d_strviews}, |
| 400 | + _labels{labels}, |
| 401 | + _list_offsets{list_offsets}, |
| 402 | + _d_strings_children{d_strings_children}, |
| 403 | + _element_seperator{element_separator}, |
| 404 | + _element_narep{element_narep} |
| 405 | + { |
| 406 | + } |
| 407 | + |
| 408 | + __device__ void operator()(size_type idx) const |
| 409 | + { |
| 410 | + auto const label = _labels[idx]; |
| 411 | + auto const sublist_index = idx - _list_offsets[label]; |
| 412 | + auto const strview_index = _d_strview_offsets[label] + sublist_index * 2 + 1; |
| 413 | + // value or na_rep |
| 414 | + auto const strview = _d_strings_children.element<cudf::string_view>(idx); |
| 415 | + _d_strviews[strview_index] = _d_strings_children.is_null(idx) ? _element_narep : strview; |
| 416 | + // separator |
| 417 | + if (sublist_index != 0) { _d_strviews[strview_index - 1] = _element_seperator; } |
| 418 | + } |
| 419 | +}; |
| 420 | + |
379 | 421 | /**
|
380 | 422 | * @brief Concatenates a list of strings columns into a single strings column.
|
381 | 423 | *
|
@@ -461,24 +503,14 @@ std::unique_ptr<column> join_list_of_strings(lists_column_view const& lists_stri
|
461 | 503 | thrust::for_each(rmm::exec_policy_nosync(stream),
|
462 | 504 | thrust::make_counting_iterator<size_type>(0),
|
463 | 505 | thrust::make_counting_iterator<size_type>(num_strings),
|
464 |
| - [col = *col_device_view, |
465 |
| - d_strview_offsets = d_strview_offsets.begin(), |
466 |
| - d_strviews = d_strviews.begin(), |
467 |
| - labels = labels->view().begin<size_type>(), |
468 |
| - list_offsets = offsets.begin<size_type>(), |
469 |
| - d_strings_children = *d_strings_children, |
470 |
| - element_separator, |
471 |
| - element_narep] __device__(auto idx) { |
472 |
| - auto const label = labels[idx]; |
473 |
| - auto const sublist_index = idx - list_offsets[label]; |
474 |
| - auto const strview_index = d_strview_offsets[label] + sublist_index * 2 + 1; |
475 |
| - // value or na_rep |
476 |
| - auto const strview = d_strings_children.element<cudf::string_view>(idx); |
477 |
| - d_strviews[strview_index] = |
478 |
| - d_strings_children.is_null(idx) ? element_narep : strview; |
479 |
| - // separator |
480 |
| - if (sublist_index != 0) { d_strviews[strview_index - 1] = element_separator; } |
481 |
| - }); |
| 506 | + scatter_fn{*col_device_view, |
| 507 | + d_strview_offsets.data(), |
| 508 | + d_strviews.data(), |
| 509 | + labels->view().data<size_type>(), |
| 510 | + offsets.data<size_type>(), |
| 511 | + *d_strings_children, |
| 512 | + element_separator, |
| 513 | + element_narep}); |
482 | 514 |
|
483 | 515 | auto joined_col = make_strings_column(d_strviews, string_view{nullptr, 0}, stream, mr);
|
484 | 516 |
|
|
0 commit comments