Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into mwilson/thread_overflow
Browse files Browse the repository at this point in the history
  • Loading branch information
hyperbolic2346 authored Oct 20, 2023
2 parents 4de0f41 + 0341bb7 commit e299a35
Show file tree
Hide file tree
Showing 38 changed files with 1,612 additions and 386 deletions.
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies:
- hypothesis
- identify>=2.5.20
- ipython
- libarrow==12.0.1.*
- libarrow==12.0.0.*
- libcufile-dev=1.4.0.31
- libcufile=1.4.0.31
- libcurand-dev=10.3.0.86
Expand Down Expand Up @@ -69,7 +69,7 @@ dependencies:
- pre-commit
- protobuf>=4.21,<5
- ptxcompiler
- pyarrow==12.0.1.*
- pyarrow==12.0.0.*
- pydata-sphinx-theme
- pyorc
- pytest
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- cachetools
- cmake>=3.26.4
- cuda-cudart-dev
- cuda-gdb
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
Expand All @@ -41,7 +42,7 @@ dependencies:
- hypothesis
- identify>=2.5.20
- ipython
- libarrow==12.0.1.*
- libarrow==12.0.0.*
- libcufile-dev
- libcurand-dev
- libkvikio==23.12.*
Expand All @@ -66,7 +67,7 @@ dependencies:
- pip
- pre-commit
- protobuf>=4.21,<5
- pyarrow==12.0.1.*
- pyarrow==12.0.0.*
- pydata-sphinx-theme
- pyorc
- pytest
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ requirements:
- scikit-build >=0.13.1
- setuptools
- dlpack >=0.5,<0.6.0a0
- pyarrow =12
- pyarrow =12.0.0
- libcudf ={{ version }}
- rmm ={{ minor_version }}
{% if cuda_major == "11" %}
Expand Down
14 changes: 13 additions & 1 deletion conda/recipes/cudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
# Need to set CUDA_HOME inside conda environments because the hacked together
# setup.py for cudf-kafka searches that way.
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*}
if [[ ${CUDA_MAJOR} == "12" ]]; then
target_name="x86_64-linux"
if [[ ! $(arch) == "x86_64" ]]; then
target_name="sbsa-linux"
fi
export CUDA_HOME="${PREFIX}/targets/${target_name}/"
fi
./build.sh -v cudf_kafka
11 changes: 11 additions & 0 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- RAPIDS_CUDA_VERSION

requirements:
build:
Expand All @@ -41,13 +44,21 @@ requirements:
- {{ compiler('cxx') }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
{% if cuda_major == "12" %}
- cuda-gdb
{% endif %}
host:
- python
- cython >=3.0.0
- cuda-version ={{ cuda_version }}
- cudf ={{ version }}
- libcudf_kafka ={{ version }}
- setuptools
{% if cuda_major == "12" %}
- cuda-cudart-dev
{% endif %}
run:
- python
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ gtest_version:
aws_sdk_cpp_version:
- "<1.11"

libarrow_version:
- "=12"
libarrow:
- "==12.0.0"

dlpack_version:
- ">=0.5,<0.6.0a0"
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ requirements:
{% endif %}
- cuda-version ={{ cuda_version }}
- nvcomp {{ nvcomp_version }}
- libarrow {{ libarrow_version }}
- libarrow {{ libarrow }}
- dlpack {{ dlpack_version }}
- librdkafka {{ librdkafka_version }}
- fmt {{ fmt_version }}
Expand Down Expand Up @@ -104,7 +104,7 @@ outputs:
- nvcomp {{ nvcomp_version }}
- librmm ={{ minor_version }}
- libkvikio ={{ minor_version }}
- libarrow {{ libarrow_version }}
- libarrow {{ libarrow }}
- dlpack {{ dlpack_version }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow)
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
12.0.1
12.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/lists/detail/scatter_helper.cuh>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -130,8 +130,8 @@ std::unique_ptr<column> scatter_impl(rmm::device_uvector<unbound_list_view> cons
std::vector<std::unique_ptr<column>> children;
children.emplace_back(std::move(offsets_column));
children.emplace_back(std::move(child_column));
auto null_mask =
target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr};
auto null_mask = target.has_nulls() ? cudf::detail::copy_bitmask(target, stream, mr)
: rmm::device_buffer{0, stream, mr};

// The output column from this function only has null masks copied from the target columns.
// That is still not a correct final null mask for the scatter result.
Expand Down
24 changes: 22 additions & 2 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_buffer.hpp>
Expand Down Expand Up @@ -80,13 +81,15 @@ size_type num_bitmask_words(size_type number_of_bits);
*
* @param size The number of elements to be represented by the mask
* @param state The desired state of the mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` for use as a null bitmask
* satisfying the desired size and state
*/
rmm::device_buffer create_null_mask(
size_type size,
mask_state state,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -100,8 +103,13 @@ rmm::device_buffer create_null_mask(
* @param begin_bit Index of the first bit to set (inclusive)
* @param end_bit Index of the last bit to set (exclusive)
* @param valid If true set all entries to valid; otherwise, set all to null
* @param stream CUDA stream used for device memory operations and kernel launches
*/
void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid);
void set_null_mask(bitmask_type* bitmask,
size_type begin_bit,
size_type end_bit,
bool valid,
rmm::cuda_stream_view stream = cudf::get_default_stream());

/**
* @brief Creates a `device_buffer` from a slice of bitmask defined by a range
Expand All @@ -115,6 +123,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit
* @param mask Bitmask residing in device memory whose bits will be copied
* @param begin_bit Index of the first bit to be copied (inclusive)
* @param end_bit Index of the last bit to be copied (exclusive)
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[begin_bit, end_bit)` from `mask`.
Expand All @@ -123,6 +132,7 @@ rmm::device_buffer copy_bitmask(
bitmask_type const* mask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -132,12 +142,14 @@ rmm::device_buffer copy_bitmask(
* Returns empty `device_buffer` if the column is not nullable
*
* @param view Column view whose bitmask needs to be copied
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
*/
rmm::device_buffer copy_bitmask(
column_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -148,11 +160,13 @@ rmm::device_buffer copy_bitmask(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_and(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -163,11 +177,13 @@ std::pair<rmm::device_buffer, size_type> bitmask_and(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -183,8 +199,12 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
* @param bitmask Validity bitmask residing in device memory.
* @param start Index of the first bit to count (inclusive).
* @param stop Index of the last bit to count (exclusive).
* @param stream CUDA stream used for device memory operations and kernel launches
* @return The number of null elements in the specified range.
*/
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
size_type stop,
rmm::cuda_stream_view stream = cudf::get_default_stream());
/** @} */ // end of group
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,

CUDF_EXPECTS((lhs.size() == rhs.size()), "Column sizes don't match");

auto [new_mask, null_count] = bitmask_and(table_view({lhs, rhs}), stream, mr);
auto [new_mask, null_count] = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr);
auto out =
make_fixed_width_column(output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);

Expand Down
38 changes: 28 additions & 10 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,21 @@ void set_null_mask(bitmask_type* bitmask,
// Create a device_buffer for a null mask
rmm::device_buffer create_null_mask(size_type size,
mask_state state,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::create_null_mask(size, state, cudf::get_default_stream(), mr);
return detail::create_null_mask(size, state, stream, mr);
}

// Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true,
// or null, otherwise;
void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid)
void set_null_mask(bitmask_type* bitmask,
size_type begin_bit,
size_type end_bit,
bool valid,
rmm::cuda_stream_view stream)
{
return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, cudf::get_default_stream());
return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, stream);
}

namespace detail {
Expand Down Expand Up @@ -511,33 +516,46 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::device_buffer copy_bitmask(bitmask_type const* mask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::copy_bitmask(mask, begin_bit, end_bit, stream, mr);
}

// Create a bitmask from a column view
rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr)
rmm::device_buffer copy_bitmask(column_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::copy_bitmask(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::copy_bitmask(view, stream, mr);
}

std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_and(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::bitmask_and(view, stream, mr);
}

std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_or(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::bitmask_or(view, stream, mr);
}

// Count non-zero bits in the specified range
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop)
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
size_type stop,
rmm::cuda_stream_view stream)
{
return detail::null_count(bitmask, start, stop, cudf::get_default_stream());
CUDF_FUNC_RANGE();
return detail::null_count(bitmask, start, stop, stream);
}

} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ rmm::device_buffer concatenate_masks(host_span<column_view const> views,
});

rmm::device_buffer null_mask =
create_null_mask(total_element_count, mask_state::UNINITIALIZED, mr);
cudf::detail::create_null_mask(total_element_count, mask_state::UNINITIALIZED, stream, mr);

detail::concatenate_masks(views, static_cast<bitmask_type*>(null_mask.data()), stream);

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ struct column_scalar_scatterer_impl<struct_view, MapIterator> {

// Compute null mask
rmm::device_buffer null_mask =
target.nullable() ? copy_bitmask(target, stream, mr)
: create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr);
target.nullable()
? detail::copy_bitmask(target, stream, mr)
: detail::create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr);
column null_mask_stub(data_type{type_id::STRUCT},
target.size(),
rmm::device_buffer{},
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ void sparse_to_dense_results(table_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first;
auto row_bitmask =
cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first;
bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE;
bitmask_type const* row_bitmask_ptr =
skip_key_rows_with_nulls ? static_cast<bitmask_type*>(row_bitmask.data()) : nullptr;
Expand Down
Loading

0 comments on commit e299a35

Please sign in to comment.