Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into pylibcudf-aggregation-serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Dec 19, 2024
2 parents 9007963 + 88df0ad commit 31f236e
Show file tree
Hide file tree
Showing 166 changed files with 3,415 additions and 4,200 deletions.
24 changes: 16 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# Please keep pr-builder as the top job here
pr-builder:
needs:
- check-nightly-ci
- changed-files
- checks
- conda-cpp-build
Expand Down Expand Up @@ -54,6 +55,18 @@ jobs:
- name: Telemetry setup
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
check-nightly-ci:
# Switch to ubuntu-latest once it defaults to a version of Ubuntu that
# provides at least Python 3.11 (see
# https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
runs-on: ubuntu-24.04
env:
RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Check if nightly CI is passing
uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
with:
repo: cudf
changed-files:
secrets: inherit
needs: telemetry-setup
Expand Down Expand Up @@ -328,16 +341,11 @@ jobs:
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"

telemetry-summarize:
runs-on: ubuntu-latest
# This job must use a self-hosted runner to record telemetry traces.
runs-on: linux-amd64-cpu4
needs: pr-builder
if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }}
continue-on-error: true
steps:
- name: Load stashed telemetry env vars
uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main
with:
load_service_name: true
- name: Telemetry summarize
uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main
with:
cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}"
uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main
6 changes: 6 additions & 0 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ rapids-mamba-retry install \

export RAPIDS_DOCS_DIR="$(mktemp -d)"

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "Build CPP docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
Expand All @@ -58,3 +62,5 @@ mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
popd

RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs

exit ${EXITCODE}
8 changes: 5 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ rapids_cpm_init()

# Not using rapids-cmake since we never want to find, always download.
CPMAddPackage(
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG
c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
)
rapids_make_logger(cudf EXPORT_SET cudf-exports)
Expand Down Expand Up @@ -916,7 +916,9 @@ if(CUDF_LARGE_STRINGS_DISABLED)
endif()

# Define logging level
target_compile_definitions(cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=${LIBCUDF_LOGGING_LEVEL}")
target_compile_definitions(
cudf PRIVATE "CUDF_LOG_ACTIVE_LEVEL=CUDF_LOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}"
)

# Enable remote IO through KvikIO
target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)
Expand Down Expand Up @@ -1105,7 +1107,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
${_tgt} PRIVATE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
)
target_include_directories(${_tgt} PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>")
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm rmm::rmm_logger rmm::rmm_logger_impl)
if(CUDF_BUILD_STACKTRACE_DEBUG)
target_link_libraries(${_tgt} PRIVATE cudf_backtrace)
endif()
Expand Down
20 changes: 14 additions & 6 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
endfunction()

# ##################################################################################################
# * column benchmarks -----------------------------------------------------------------------------
ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate.cpp)
# * copying benchmarks
# -----------------------------------------------------------------------------
ConfigureNVBench(COPYING_NVBENCH copying/concatenate.cpp)

# ##################################################################################################
# * gather benchmark ------------------------------------------------------------------------------
Expand Down Expand Up @@ -351,11 +352,18 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
ConfigureBench(TEXT_BENCH text/subword.cpp)

ConfigureNVBench(
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
text/ngrams.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp
TEXT_NVBENCH
text/edit_distance.cpp
text/hash_ngrams.cpp
text/jaccard.cpp
text/minhash.cpp
text/ngrams.cpp
text/normalize.cpp
text/replace.cpp
text/subword.cpp
text/tokenize.cpp
text/vocab.cpp
)

# ##################################################################################################
Expand Down
169 changes: 0 additions & 169 deletions cpp/benchmarks/column/concatenate.cpp

This file was deleted.

84 changes: 84 additions & 0 deletions cpp/benchmarks/copying/concatenate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <benchmarks/common/generate_input.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/concatenate.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

#include <vector>

static void bench_concatenate(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const nulls = static_cast<cudf::size_type>(state.get_float64("nulls"));

auto input = create_sequence_table(
cycle_dtypes({cudf::type_to_id<int64_t>()}, num_cols), row_count{num_rows}, nulls);
auto input_columns = input->view();
auto column_views = std::vector<cudf::column_view>(input_columns.begin(), input_columns.end());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.add_global_memory_reads<int64_t>(num_rows * num_cols);
state.add_global_memory_writes<int64_t>(num_rows * num_cols);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
}

NVBENCH_BENCH(bench_concatenate)
.set_name("concatenate")
.add_int64_axis("num_rows", {64, 512, 4096, 32768, 262144})
.add_int64_axis("num_cols", {2, 8, 64, 512, 1024})
.add_float64_axis("nulls", {0.0, 0.3});

static void bench_concatenate_strings(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const nulls = static_cast<cudf::size_type>(state.get_float64("nulls"));

data_profile const profile =
data_profile_builder()
.distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
.null_probability(nulls);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
auto const input = column->view();

auto column_views = std::vector<cudf::column_view>(num_cols, input);

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto const sv = cudf::strings_column_view(input);
state.add_global_memory_reads<int8_t>(sv.chars_size(stream) * num_cols);
state.add_global_memory_writes<int64_t>(sv.chars_size(stream) * num_cols);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
}

NVBENCH_BENCH(bench_concatenate_strings)
.set_name("concatenate_strings")
.add_int64_axis("num_rows", {256, 512, 4096, 16384})
.add_int64_axis("num_cols", {2, 8, 64, 256})
.add_int64_axis("row_width", {32, 128})
.add_float64_axis("nulls", {0.0, 0.3});
Loading

0 comments on commit 31f236e

Please sign in to comment.