Skip to content

Commit

Permalink
Correctly set is_device_accesible when creating host_spans from o…
Browse files Browse the repository at this point in the history
…ther container/span types (#17079)

Discovered that the way `host_span`s are created from `hostdevice_vector`, `hostdevice_span`,  `hostdevice_2dvector` and `host_2dspan` (yes, these are all real types!) does not propagate the `is_device_accesible` flag. In most of the cases these spans use pinned memory, so we're incorrect most of the time.
This PR fixed the way these conversions work. Adjusted some APIs to make it a bit harder to avoid passing the `is_device_accesible` flag.

Removed a few unused functions in `span.hpp` to keep the file as light as possible (it's included EVERYWHERE).

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: #17079
  • Loading branch information
vuule authored Oct 17, 2024
1 parent 6eeb7d6 commit 14209c1
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 220 deletions.
131 changes: 50 additions & 81 deletions cpp/include/cudf/utilities/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,6 @@ class span_base {
return Derived(_data + _size - count, count);
}

/**
* @brief Obtains a span that is a view over the `count` elements of this span starting at offset
*
* @param offset The offset of the first element in the subspan
* @param count The number of elements in the subspan
* @return A subspan of the sequence, of requested count and offset
*/
[[nodiscard]] constexpr Derived subspan(size_type offset, size_type count) const noexcept
{
return Derived(_data + offset, count);
}

private:
pointer _data{nullptr};
size_type _size{0};
Expand Down Expand Up @@ -234,6 +222,15 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent

constexpr host_span() noexcept : base() {} // required to compile on centos

/// Constructor from pointer and size
/// @param data Pointer to the first element in the span
/// @param size The number of elements in the span
/// @param is_device_accessible Whether the data is device accessible (e.g. pinned memory)
constexpr host_span(T* data, std::size_t size, bool is_device_accessible)
: base(data, size), _is_device_accessible{is_device_accessible}
{
}

/// Constructor from container
/// @param in The container to construct the span from
template <typename C,
Expand Down Expand Up @@ -299,6 +296,19 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
*/
[[nodiscard]] bool is_device_accessible() const { return _is_device_accessible; }

/**
* @brief Obtains a span that is a view over the `count` elements of this span starting at offset
*
* @param offset The offset of the first element in the subspan
* @param count The number of elements in the subspan
* @return A subspan of the sequence, of requested count and offset
*/
[[nodiscard]] constexpr host_span subspan(typename base::size_type offset,
typename base::size_type count) const noexcept
{
return host_span{this->data() + offset, count, _is_device_accessible};
}

private:
bool _is_device_accessible{false};
};
Expand Down Expand Up @@ -368,6 +378,19 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
: base(other.data(), other.size())
{
}

/**
* @brief Obtains a span that is a view over the `count` elements of this span starting at offset
*
* @param offset The offset of the first element in the subspan
* @param count The number of elements in the subspan
* @return A subspan of the sequence, of requested count and offset
*/
[[nodiscard]] constexpr device_span subspan(typename base::size_type offset,
typename base::size_type count) const noexcept
{
return device_span{this->data() + offset, count};
}
};
/** @} */ // end of group

Expand All @@ -386,62 +409,45 @@ class base_2dspan {

constexpr base_2dspan() noexcept = default;
/**
* @brief Constructor a 2D span
* @brief Constructor from a span and number of elements in each row.
*
* @param data Pointer to the data
* @param rows Number of rows
* @param flat_view The flattened 2D span
* @param columns Number of columns
*/
constexpr base_2dspan(T* data, size_t rows, size_t columns) noexcept
: _data{data}, _size{rows, columns}
constexpr base_2dspan(RowType<T, dynamic_extent> flat_view, size_t columns)
: _flat{flat_view}, _size{columns == 0 ? 0 : flat_view.size() / columns, columns}
{
CUDF_EXPECTS(_size.first * _size.second == flat_view.size(), "Invalid 2D span size");
}
/**
* @brief Constructor a 2D span
*
* @param data Pointer to the data
* @param size Size of the 2D span as pair
*/
base_2dspan(T* data, size_type size) noexcept : _data{data}, _size{std::move(size)} {}

/**
* @brief Returns a pointer to the beginning of the sequence.
*
* @return A pointer to the first element of the span
*/
constexpr auto data() const noexcept { return _data; }
constexpr auto data() const noexcept { return _flat.data(); }

/**
* @brief Returns the size in the span as pair.
*
* @return pair representing rows and columns size of the span
*/
constexpr auto size() const noexcept { return _size; }

/**
* @brief Returns the number of elements in the span.
*
* @return Number of elements in the span
*/
constexpr auto count() const noexcept { return size().first * size().second; }
constexpr auto count() const noexcept { return _flat.size(); }

/**
* @brief Checks if the span is empty.
*
* @return True if the span is empty, false otherwise
*/
[[nodiscard]] constexpr bool is_empty() const noexcept { return count() == 0; }

/**
* @brief Returns flattened index of the element at the specified 2D position.
*
* @param row The row index
* @param column The column index
* @param size The size of the 2D span as pair
* @return The flattened index of the element at the specified 2D position
*/
static constexpr size_t flatten_index(size_t row, size_t column, size_type size) noexcept
{
return row * size.second + column;
}

/**
* @brief Returns a reference to the row-th element of the sequence.
*
Expand All @@ -453,52 +459,15 @@ class base_2dspan {
*/
constexpr RowType<T, dynamic_extent> operator[](size_t row) const
{
return {this->data() + flatten_index(row, 0, this->size()), this->size().second};
}

/**
* @brief Returns a reference to the first element in the span.
*
* Calling front() on an empty span results in undefined behavior.
*
* @return Reference to the first element in the span
*/
[[nodiscard]] constexpr RowType<T, dynamic_extent> front() const { return (*this)[0]; }
/**
* @brief Returns a reference to the last element in the span.
*
* Calling back() on an empty span results in undefined behavior.
*
* @return Reference to the last element in the span
*/
[[nodiscard]] constexpr RowType<T, dynamic_extent> back() const
{
return (*this)[size().first - 1];
}

/**
* @brief Obtains a 2D span that is a view over the `num_rows` rows of this span starting at
* `first_row`
*
* @param first_row The first row in the subspan
* @param num_rows The number of rows in the subspan
* @return A subspan of the sequence, of requested starting `first_row` and `num_rows`
*/
constexpr base_2dspan subspan(size_t first_row, size_t num_rows) const noexcept
{
return base_2dspan(
_data + flatten_index(first_row, 0, this->size()), num_rows, this->size().second);
return _flat.subspan(row * _size.second, _size.second);
}

/**
* @brief Returns a flattened span of the 2D span.
*
* @return A flattened span of the 2D span
*/
constexpr RowType<T, dynamic_extent> flat_view()
{
return {this->data(), this->size().first * this->size().second};
}
constexpr RowType<T, dynamic_extent> flat_view() const { return _flat; }

/**
* @brief Construct a 2D span from another 2D span of convertible type
Expand All @@ -514,13 +483,13 @@ class base_2dspan {
RowType<T, dynamic_extent>>,
void>* = nullptr>
constexpr base_2dspan(base_2dspan<OtherT, OtherRowType> const& other) noexcept
: _data{other.data()}, _size{other.size()}
: _flat{other.flat_view()}, _size{other.size()}
{
}

protected:
T* _data = nullptr; ///< pointer to the first element
size_type _size{0, 0}; ///< rows, columns
RowType<T, dynamic_extent> _flat; ///< flattened 2D span
size_type _size{0, 0}; ///< num rows, num columns
};

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/orc/reader_impl_chunking.cu
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,8 @@ void reader_impl::load_next_stripe_data(read_mode mode)
if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) {
auto const& decompressor = *_metadata.per_file_metadata[0].decompressor;

auto compinfo = cudf::detail::hostdevice_span<gpu::CompressedStreamInfo>(
hd_compinfo.begin(), hd_compinfo.d_begin(), stream_range.size());
auto compinfo = cudf::detail::hostdevice_span<gpu::CompressedStreamInfo>{hd_compinfo}.subspan(
0, stream_range.size());
for (auto stream_idx = stream_range.begin; stream_idx < stream_range.end; ++stream_idx) {
auto const& info = stream_info[stream_idx];
auto const dst_base =
Expand Down
44 changes: 22 additions & 22 deletions cpp/src/io/orc/reader_impl_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -508,21 +508,20 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector<gpu::ColumnDesc> const&
auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async(
prefix_sums_to_update, stream, cudf::get_current_device_resource_ref());

thrust::for_each(
rmm::exec_policy_nosync(stream),
d_prefix_sums_to_update.begin(),
d_prefix_sums_to_update.end(),
[num_stripes, chunks = cudf::detail::device_2dspan<gpu::ColumnDesc const>{chunks}] __device__(
auto const& idx_psums) {
auto const col_idx = idx_psums.first;
auto const psums = idx_psums.second;
thrust::transform(thrust::seq,
thrust::make_counting_iterator<std::size_t>(0ul),
thrust::make_counting_iterator<std::size_t>(num_stripes),
psums,
[&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; });
thrust::inclusive_scan(thrust::seq, psums, psums + num_stripes, psums);
});
thrust::for_each(rmm::exec_policy_nosync(stream),
d_prefix_sums_to_update.begin(),
d_prefix_sums_to_update.end(),
[num_stripes, chunks = chunks.device_view()] __device__(auto const& idx_psums) {
auto const col_idx = idx_psums.first;
auto const psums = idx_psums.second;
thrust::transform(
thrust::seq,
thrust::make_counting_iterator<std::size_t>(0ul),
thrust::make_counting_iterator<std::size_t>(num_stripes),
psums,
[&](auto stripe_idx) { return chunks[stripe_idx][col_idx].null_count; });
thrust::inclusive_scan(thrust::seq, psums, psums + num_stripes, psums);
});
// `prefix_sums_to_update` goes out of scope, copy has to be done before we return
stream.synchronize();
}
Expand Down Expand Up @@ -554,12 +553,12 @@ void aggregate_child_meta(std::size_t level,
col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks);
col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols);

auto child_start_row = cudf::detail::host_2dspan<int64_t>(
col_meta.child_start_row.data(), num_of_stripes, num_child_cols);
auto num_child_rows_per_stripe = cudf::detail::host_2dspan<int64_t>(
col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols);
auto child_start_row =
cudf::detail::host_2dspan<int64_t>(col_meta.child_start_row, num_child_cols);
auto num_child_rows_per_stripe =
cudf::detail::host_2dspan<int64_t>(col_meta.num_child_rows_per_stripe, num_child_cols);
auto rwgrp_meta = cudf::detail::host_2dspan<reader_column_meta::row_group_meta>(
col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols);
col_meta.rwgrp_meta, num_child_cols);

int index = 0; // number of child column processed

Expand Down Expand Up @@ -951,8 +950,9 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode)

// Setup row group descriptors if using indexes.
if (_metadata.per_file_metadata[0].ps.compression != orc::NONE) {
auto compinfo = cudf::detail::hostdevice_span<gpu::CompressedStreamInfo>(
hd_compinfo.begin(), hd_compinfo.d_begin(), stream_range.size());
auto const compinfo =
cudf::detail::hostdevice_span<gpu::CompressedStreamInfo>{hd_compinfo}.subspan(
0, stream_range.size());
auto decomp_data = decompress_stripe_data(load_stripe_range,
stream_range,
stripe_count,
Expand Down
15 changes: 7 additions & 8 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -718,8 +718,8 @@ std::vector<std::vector<rowgroup_rows>> calculate_aligned_rowgroup_bounds(

auto d_pd_set_counts_data = rmm::device_uvector<cudf::size_type>(
orc_table.num_columns() * segmentation.num_rowgroups(), stream);
auto const d_pd_set_counts = device_2dspan<cudf::size_type>{
d_pd_set_counts_data.data(), segmentation.num_rowgroups(), orc_table.num_columns()};
auto const d_pd_set_counts =
device_2dspan<cudf::size_type>{d_pd_set_counts_data, orc_table.num_columns()};
gpu::reduce_pushdown_masks(orc_table.d_columns, segmentation.rowgroups, d_pd_set_counts, stream);

auto aligned_rgs = hostdevice_2dvector<rowgroup_rows>(
Expand All @@ -740,7 +740,7 @@ std::vector<std::vector<rowgroup_rows>> calculate_aligned_rowgroup_bounds(
[columns = device_span<orc_column_device_view const>{orc_table.d_columns},
stripes = device_span<stripe_rowgroups const>{d_stripes},
d_pd_set_counts,
out_rowgroups = device_2dspan<rowgroup_rows>{aligned_rgs}] __device__(auto& idx) {
out_rowgroups = aligned_rgs.device_view()] __device__(auto& idx) {
uint32_t const col_idx = idx / stripes.size();
// No alignment needed for root columns
if (not columns[col_idx].parent_index.has_value()) return;
Expand Down Expand Up @@ -912,7 +912,7 @@ encoded_data encode_columns(orc_table_view const& orc_table,
rmm::exec_policy(stream),
thrust::make_counting_iterator(0ul),
chunks.count(),
[chunks = device_2dspan<gpu::EncChunk>{chunks},
[chunks = chunks.device_view(),
cols = device_span<orc_column_device_view const>{orc_table.d_columns}] __device__(auto& idx) {
auto const col_idx = idx / chunks.size().second;
auto const rg_idx = idx % chunks.size().second;
Expand Down Expand Up @@ -1898,7 +1898,7 @@ hostdevice_2dvector<rowgroup_rows> calculate_rowgroup_bounds(orc_table_view cons
thrust::make_counting_iterator(0ul),
num_rowgroups,
[cols = device_span<orc_column_device_view const>{orc_table.d_columns},
rg_bounds = device_2dspan<rowgroup_rows>{rowgroup_bounds},
rg_bounds = rowgroup_bounds.device_view(),
rowgroup_size] __device__(auto rg_idx) mutable {
thrust::transform(
thrust::seq, cols.begin(), cols.end(), rg_bounds[rg_idx].begin(), [&](auto const& col) {
Expand Down Expand Up @@ -1988,8 +1988,7 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table,
d_tmp_rowgroup_sizes.end(),
[src = esizes.data(),
col_idx = col_idx,
rg_bounds = device_2dspan<rowgroup_rows const>{
segmentation.rowgroups}] __device__(auto idx) {
rg_bounds = segmentation.rowgroups.device_view()] __device__(auto idx) {
return src[rg_bounds[idx][col_idx].end - 1];
});

Expand Down Expand Up @@ -2051,7 +2050,7 @@ auto set_rowgroup_char_counts(orc_table_view& orc_table,
auto const num_str_cols = orc_table.num_string_columns();

auto counts = rmm::device_uvector<size_type>(num_str_cols * num_rowgroups, stream);
auto counts_2d_view = device_2dspan<size_type>(counts.data(), num_str_cols, num_rowgroups);
auto counts_2d_view = device_2dspan<size_type>(counts, num_rowgroups);
gpu::rowgroup_char_counts(counts_2d_view,
orc_table.d_columns,
rowgroup_bounds,
Expand Down
Loading

0 comments on commit 14209c1

Please sign in to comment.