Skip to content

Commit

Permalink
Merge pull request #16918 from vyasr/branch-24.12-merge-branch-24.10
Browse files Browse the repository at this point in the history
Branch 24.12 merge branch 24.10
  • Loading branch information
raydouglass authored Sep 25, 2024
2 parents ba7d6e7 + 1305de9 commit dffc0c7
Show file tree
Hide file tree
Showing 33 changed files with 1,205 additions and 178 deletions.
18 changes: 16 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
test_cudf_pandas: ${{ steps.changed-files.outputs.cudf_pandas_any_changed == 'true' }}
steps:
- name: Get PR info
id: get-pr-info
Expand Down Expand Up @@ -82,6 +83,7 @@ jobs:
- '!java/**'
- '!notebooks/**'
- '!python/**'
- '!ci/cudf_pandas_scripts/**'
java:
- '**'
- '!CONTRIBUTING.md'
Expand All @@ -90,11 +92,13 @@ jobs:
- '!img/**'
- '!notebooks/**'
- '!python/**'
- '!ci/cudf_pandas_scripts/**'
notebooks:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!java/**'
- '!ci/cudf_pandas_scripts/**'
python:
- '**'
- '!CONTRIBUTING.md'
Expand All @@ -103,6 +107,16 @@ jobs:
- '!img/**'
- '!java/**'
- '!notebooks/**'
- '!ci/cudf_pandas_scripts/**'
cudf_pandas:
- '**'
- 'ci/cudf_pandas_scripts/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -289,7 +303,7 @@ jobs:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -300,7 +314,7 @@ jobs:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand Down
64 changes: 50 additions & 14 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,33 @@ def emoji_failed(x):
# convert pr_results to a pandas DataFrame and then a markdown table
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df
total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call']
pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1)
pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1)
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)

total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)

cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)

gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
main_df["GPU Usage"] = main_df["GPU Usage"].fillna(0)

cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)
diff_df = pr_df - main_df
diff_df["CPU Usage"] = diff_df["CPU Usage"].round(1).fillna(0)
diff_df["GPU Usage"] = diff_df["GPU Usage"].round(1).fillna(0)

# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%'
pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%'
# Add '%' suffix to "CPU Usage" and "GPU Usage" columns
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"

pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
Expand All @@ -99,13 +112,36 @@ def emoji_failed(x):
"passed_diff": "Passed delta",
"failed_diff": "Failed delta",
"skipped_diff": "Skipped delta",
"CPU Usage_diff": "CPU Usage delta",
"GPU Usage_diff": "GPU Usage delta",
}
)
df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False)

df = df.sort_values(by=["CPU Usage delta", "Total tests"], ascending=False)
df["CPU Usage delta"] = df["CPU Usage delta"].map(emoji_failed)
df["GPU Usage delta"] = df["GPU Usage delta"].map(emoji_passed)
df = df[
[
"Total tests",
"CPU Usage delta",
"GPU Usage delta",
"Passed tests",
"Failed tests",
"Skipped tests",
"CPU Usage",
"GPU Usage",
"Total delta",
"Passed delta",
"Failed delta",
"Skipped delta",
]
]
print(comment)
print()
print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
print(
f"Average GPU usage: {gpu_usage_mean}% {'an increase' if gpu_usage_rate_change > 0 else 'a decrease'} by {gpu_usage_rate_change}%"
)
print()
print(f"Average CPU usage: {cpu_usage_mean}%")
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ add_library(
src/io/functions.cpp
src/io/json/host_tree_algorithms.cu
src/io/json/json_column.cu
src/io/json/column_tree_construction.cu
src/io/json/json_normalization.cu
src/io/json/json_tree.cu
src/io/json/nested_json_gpu.cu
Expand Down
5 changes: 5 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,11 @@ ConfigureNVBench(STRUCT_CREATION_NVBENCH structs/create_structs.cpp)
# --------------------------------------------------------------------------------
ConfigureBench(QUANTILES_BENCH quantiles/quantiles.cpp)

# ##################################################################################################
# * tdigest benchmark
# --------------------------------------------------------------------------------
ConfigureNVBench(TDIGEST_NVBENCH quantiles/tdigest.cu)

# ##################################################################################################
# * type_dispatcher benchmark ---------------------------------------------------------------------
ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu)
Expand Down
123 changes: 123 additions & 0 deletions cpp/benchmarks/quantiles/tdigest.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/column_wrapper.hpp>

#include <cudf/detail/tdigest/tdigest.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/exec_policy.hpp>

#include <cuda/functional>
#include <thrust/copy.h>
#include <thrust/execution_policy.h>

#include <nvbench/nvbench.cuh>

void bm_tdigest_merge(nvbench::state& state)
{
auto const num_tdigests = static_cast<cudf::size_type>(state.get_int64("num_tdigests"));
auto const tdigest_size = static_cast<cudf::size_type>(state.get_int64("tdigest_size"));
auto const tdigests_per_group =
static_cast<cudf::size_type>(state.get_int64("tdigests_per_group"));
auto const max_centroids = static_cast<cudf::size_type>(state.get_int64("max_centroids"));
auto const num_groups = num_tdigests / tdigests_per_group;
auto const total_centroids = num_tdigests * tdigest_size;

auto stream = cudf::get_default_stream();
auto mr = rmm::mr::get_current_device_resource();

constexpr int base_value = 5;

// construct inner means/weights
auto val_iter = cudf::detail::make_counting_transform_iterator(
0, cuda::proclaim_return_type<double>([tdigest_size](cudf::size_type i) {
return static_cast<double>(base_value + (i % tdigest_size));
}));
auto one_iter = thrust::make_constant_iterator(1);
cudf::test::fixed_width_column_wrapper<double> means(val_iter, val_iter + total_centroids);
cudf::test::fixed_width_column_wrapper<double> weights(one_iter, one_iter + total_centroids);
std::vector<std::unique_ptr<cudf::column>> inner_struct_children;
inner_struct_children.push_back(means.release());
inner_struct_children.push_back(weights.release());
cudf::test::structs_column_wrapper inner_struct(std::move(inner_struct_children));

// construct the tdigest lists themselves
auto offset_iter = cudf::detail::make_counting_transform_iterator(
0, cuda::proclaim_return_type<cudf::size_type>([tdigest_size](cudf::size_type i) {
return i * tdigest_size;
}));
cudf::test::fixed_width_column_wrapper<int> offsets(offset_iter, offset_iter + num_tdigests + 1);
auto list_col = cudf::make_lists_column(
num_tdigests, offsets.release(), inner_struct.release(), 0, {}, stream, mr);

// min and max columns
auto min_iter = thrust::make_constant_iterator(base_value);
auto max_iter = thrust::make_constant_iterator(base_value + (tdigest_size - 1));
cudf::test::fixed_width_column_wrapper<double> mins(min_iter, min_iter + num_tdigests);
cudf::test::fixed_width_column_wrapper<double> maxes(max_iter, max_iter + num_tdigests);

// assemble the whole thing
std::vector<std::unique_ptr<cudf::column>> tdigest_children;
tdigest_children.push_back(std::move(list_col));
tdigest_children.push_back(mins.release());
tdigest_children.push_back(maxes.release());
cudf::test::structs_column_wrapper tdigest(std::move(tdigest_children));

rmm::device_uvector<cudf::size_type> group_offsets(num_groups + 1, stream, mr);
rmm::device_uvector<cudf::size_type> group_labels(num_tdigests, stream, mr);
auto group_offset_iter = cudf::detail::make_counting_transform_iterator(
0,
cuda::proclaim_return_type<cudf::size_type>(
[tdigests_per_group] __device__(cudf::size_type i) { return i * tdigests_per_group; }));
thrust::copy(rmm::exec_policy_nosync(stream, mr),
group_offset_iter,
group_offset_iter + num_groups + 1,
group_offsets.begin());
auto group_label_iter = cudf::detail::make_counting_transform_iterator(
0,
cuda::proclaim_return_type<cudf::size_type>(
[tdigests_per_group] __device__(cudf::size_type i) { return i / tdigests_per_group; }));
thrust::copy(rmm::exec_policy_nosync(stream, mr),
group_label_iter,
group_label_iter + num_tdigests,
group_labels.begin());

state.add_element_count(total_centroids);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
[&](nvbench::launch& launch, auto& timer) {
timer.start();
auto result = cudf::tdigest::detail::group_merge_tdigest(
tdigest, group_offsets, group_labels, num_groups, max_centroids, stream, mr);
timer.stop();
});
}

NVBENCH_BENCH(bm_tdigest_merge)
.set_name("TDigest many tiny groups")
.add_int64_axis("num_tdigests", {500'000})
.add_int64_axis("tdigest_size", {1, 1000})
.add_int64_axis("tdigests_per_group", {1})
.add_int64_axis("max_centroids", {10000, 1000});

NVBENCH_BENCH(bm_tdigest_merge)
.set_name("TDigest many small groups")
.add_int64_axis("num_tdigests", {500'000})
.add_int64_axis("tdigest_size", {1, 1000})
.add_int64_axis("tdigests_per_group", {3})
.add_int64_axis("max_centroids", {10000, 1000});
Loading

0 comments on commit dffc0c7

Please sign in to comment.