Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into fea/extract-pq-bloom-filter-data
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Dec 18, 2024
2 parents 4194d30 + a081a57 commit 8b7baff
Show file tree
Hide file tree
Showing 33 changed files with 375 additions and 365 deletions.
11 changes: 3 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -328,16 +328,11 @@ jobs:
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"

telemetry-summarize:
runs-on: ubuntu-latest
# This job must use a self-hosted runner to record telemetry traces.
runs-on: linux-amd64-cpu4
needs: pr-builder
if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }}
continue-on-error: true
steps:
- name: Load stashed telemetry env vars
uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main
with:
load_service_name: true
- name: Telemetry summarize
uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main
with:
cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}"
uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main
6 changes: 4 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ rapids_cpm_init()

# Not using rapids-cmake since we never want to find, always download.
CPMAddPackage(
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG
c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
)
rapids_make_logger(cudf EXPORT_SET cudf-exports)
Expand Down Expand Up @@ -917,7 +917,9 @@ if(CUDF_LARGE_STRINGS_DISABLED)
endif()

# Define logging level
target_compile_definitions(cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=${LIBCUDF_LOGGING_LEVEL}")
target_compile_definitions(
cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=CUDF_LOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}"
)

# Enable remote IO through KvikIO
target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)
Expand Down
8 changes: 3 additions & 5 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@

# This function finds nanoarrow and sets any additional necessary environment variables.
function(find_and_configure_nanoarrow)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")

if(NOT BUILD_SHARED_LIBS)
set(_exclude_from_all EXCLUDE_FROM_ALL FALSE)
else()
Expand All @@ -31,6 +26,9 @@ function(find_and_configure_nanoarrow)
nanoarrow 0.6.0.dev
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all}
)
set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
38 changes: 0 additions & 38 deletions cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff

This file was deleted.

18 changes: 0 additions & 18 deletions cpp/cmake/thirdparty/patches/nanoarrow_override.json

This file was deleted.

6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.cuh>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -31,7 +32,6 @@
#include <thrust/fill.h>

#include <type_traits>
#include <vector>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -216,12 +216,12 @@ struct identity_initializer {
* @throw cudf::logic_error if column type is not fixed-width
*
* @param table The table of columns to initialize.
* @param aggs A vector of aggregation operations corresponding to the table
* @param aggs A span of aggregation operations corresponding to the table
* columns. The aggregations determine the identity value for each column.
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
void initialize_with_identity(mutable_table_view& table,
std::vector<aggregation::Kind> const& aggs,
host_span<cudf::aggregation::Kind const> aggs,
rmm::cuda_stream_view stream);

} // namespace detail
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/aggregation/aggregation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@
#include <cudf/detail/aggregation/aggregation.cuh>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <vector>

namespace cudf {
namespace detail {
void initialize_with_identity(mutable_table_view& table,
std::vector<aggregation::Kind> const& aggs,
host_span<cudf::aggregation::Kind const> aggs,
rmm::cuda_stream_view stream)
{
// TODO: Initialize all the columns in a single kernel instead of invoking one
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/compute_aggregations.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
rmm::cuda_stream_view stream)
{
// flatten the aggs to a table that can be operated on by aggregate_row
auto [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests);
auto [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests, stream);
auto const d_agg_kinds = cudf::detail::make_device_uvector_async(
agg_kinds, stream, rmm::mr::get_current_device_resource());

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/compute_global_memory_aggs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<global_
bitmask_type const* row_bitmask,
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> const& agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
global_set_t& global_set,
std::vector<std::unique_ptr<aggregation>>& aggregations,
cudf::detail::result_cache* sparse_results,
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/hash/compute_global_memory_aggs.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cudf/groupby.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -44,7 +45,7 @@ rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
bitmask_type const* row_bitmask,
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> const& agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
SetType& global_set,
std::vector<std::unique_ptr<aggregation>>& aggregations,
cudf::detail::result_cache* sparse_results,
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/hash/compute_global_memory_aggs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/groupby.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -34,7 +35,7 @@ rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
bitmask_type const* row_bitmask,
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> const& agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
SetType& global_set,
std::vector<std::unique_ptr<aggregation>>& aggregations,
cudf::detail::result_cache* sparse_results,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<nullabl
bitmask_type const* row_bitmask,
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> const& agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
nullable_global_set_t& global_set,
std::vector<std::unique_ptr<aggregation>>& aggregations,
cudf::detail::result_cache* sparse_results,
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/groupby/hash/create_sparse_results_table.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/detail/aggregation/aggregation.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -48,7 +49,7 @@ void extract_populated_keys(SetType const& key_set,
template <typename GlobalSetType>
cudf::table create_sparse_results_table(cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
bool direct_aggregations,
GlobalSetType const& global_set,
rmm::device_uvector<cudf::size_type>& populated_keys,
Expand Down Expand Up @@ -107,7 +108,7 @@ template void extract_populated_keys<nullable_global_set_t>(
template cudf::table create_sparse_results_table<global_set_t>(
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
bool direct_aggregations,
global_set_t const& global_set,
rmm::device_uvector<cudf::size_type>& populated_keys,
Expand All @@ -116,7 +117,7 @@ template cudf::table create_sparse_results_table<global_set_t>(
template cudf::table create_sparse_results_table<nullable_global_set_t>(
cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
bool direct_aggregations,
nullable_global_set_t const& global_set,
rmm::device_uvector<cudf::size_type>& populated_keys,
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/groupby/hash/create_sparse_results_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <vector>

namespace cudf::groupby::detail::hash {
/**
* @brief Computes and returns a device vector containing all populated keys in
Expand All @@ -47,7 +46,7 @@ void extract_populated_keys(SetType const& key_set,
template <typename GlobalSetType>
cudf::table create_sparse_results_table(cudf::table_view const& flattened_values,
cudf::aggregation::Kind const* d_agg_kinds,
std::vector<cudf::aggregation::Kind> agg_kinds,
host_span<cudf::aggregation::Kind const> agg_kinds,
bool direct_aggregations,
GlobalSetType const& global_set,
rmm::device_uvector<cudf::size_type>& populated_keys,
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <cudf/aggregation.hpp>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/groupby.hpp>
#include <cudf/types.hpp>
Expand Down Expand Up @@ -102,12 +103,15 @@ class groupby_simple_aggregations_collector final
};

// flatten aggs to filter in single pass aggs
std::tuple<table_view, std::vector<aggregation::Kind>, std::vector<std::unique_ptr<aggregation>>>
flatten_single_pass_aggs(host_span<aggregation_request const> requests)
std::tuple<table_view,
cudf::detail::host_vector<aggregation::Kind>,
std::vector<std::unique_ptr<aggregation>>>
flatten_single_pass_aggs(host_span<aggregation_request const> requests,
rmm::cuda_stream_view stream)
{
std::vector<column_view> columns;
std::vector<std::unique_ptr<aggregation>> aggs;
std::vector<aggregation::Kind> agg_kinds;
auto agg_kinds = cudf::detail::make_empty_host_vector<aggregation::Kind>(requests.size(), stream);

for (auto const& request : requests) {
auto const& agg_v = request.aggregations;
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/groupby/hash/flatten_single_pass_aggs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
namespace cudf::groupby::detail::hash {

// flatten aggs to filter in single pass aggs
std::tuple<table_view, std::vector<aggregation::Kind>, std::vector<std::unique_ptr<aggregation>>>
flatten_single_pass_aggs(host_span<aggregation_request const> requests);
std::tuple<table_view,
cudf::detail::host_vector<aggregation::Kind>,
std::vector<std::unique_ptr<aggregation>>>
flatten_single_pass_aggs(host_span<aggregation_request const> requests,
rmm::cuda_stream_view stream);

} // namespace cudf::groupby::detail::hash
3 changes: 2 additions & 1 deletion cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::var_aggregation c
cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream);
mutable_table_view var_table_view{{var_result->mutable_view()}};
cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream);
cudf::detail::initialize_with_identity(
var_table_view, host_span<cudf::aggregation::Kind const>(&agg.kind, 1), stream);

thrust::for_each_n(
rmm::exec_policy_nosync(stream),
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/groupby/sort/group_scan_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ struct group_scan_functor<K, T, std::enable_if_t<is_group_scan_supported<K, T>()
if (values.is_empty()) { return result; }

auto result_table = mutable_table_view({*result});
cudf::detail::initialize_with_identity(result_table, {K}, stream);
// Need an address of the aggregation kind to pass to the span
auto const kind = K;
cudf::detail::initialize_with_identity(
result_table, host_span<aggregation::Kind const>(&kind, 1), stream);

auto result_view = mutable_column_device_view::create(result->mutable_view(), stream);
auto values_view = column_device_view::create(values, stream);
Expand Down
10 changes: 5 additions & 5 deletions cpp/tests/streams/replace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ TEST_F(ReplaceTest, NormalizeNansAndZeros)

TEST_F(ReplaceTest, NormalizeNansAndZerosMutable)
{
auto nan = std::numeric_limits<double>::quiet_NaN();
auto input_column = cudf::test::make_type_param_vector<double>({-0.0, 0.0, -nan, nan, nan});
cudf::test::fixed_width_column_wrapper<double> input(input_column.begin(), input_column.end());
cudf::mutable_column_view mutable_view = cudf::column(input, cudf::test::get_default_stream());
cudf::normalize_nans_and_zeros(mutable_view, cudf::test::get_default_stream());
auto nan = std::numeric_limits<double>::quiet_NaN();
auto data = cudf::test::make_type_param_vector<double>({-0.0, 0.0, -nan, nan, nan});
auto input = cudf::test::fixed_width_column_wrapper<double>(data.begin(), data.end()).release();
auto view = input->mutable_view();
cudf::normalize_nans_and_zeros(view, cudf::test::get_default_stream());
}
9 changes: 1 addition & 8 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx groupby.pyx interop.pyx scalar.pyx strings_udf.pyx types.pyx
utils.pyx
)
set(cython_sources column.pyx groupby.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand All @@ -24,8 +22,3 @@ rapids_cython_create_modules(
)

target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")

include(${rapids-cmake-dir}/export/find_package_root.cmake)
include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
target_link_libraries(interop PUBLIC nanoarrow)
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from . import (
groupby,
interop,
strings_udf,
)

Expand Down
Loading

0 comments on commit 8b7baff

Please sign in to comment.