Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into fea-parquet-multithreaded-example
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Sep 23, 2024
2 parents 188ce11 + 6255906 commit 1827654
Show file tree
Hide file tree
Showing 176 changed files with 6,104 additions and 2,086 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cudf.polars:
- 'python/cudf_polars/**'

pylibcudf:
- 'python/cudf/pylibcudf/**'
- 'python/pylibcudf/**'

libcudf:
- 'cpp/**'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
18 changes: 15 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
- wheel-tests-cudf
- wheel-build-cudf-polars
- wheel-tests-cudf-polars
- cudf-polars-polars-tests
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -159,7 +160,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_java.sh"
static-configure:
needs: checks
Expand All @@ -180,7 +181,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -190,7 +191,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/build_docs.sh"
wheel-build-libcudf:
needs: checks
Expand Down Expand Up @@ -244,6 +245,17 @@ jobs:
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_wheel_cudf_polars.sh"
cudf-polars-polars-tests:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars_polars_tests.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_cpp_memcheck.sh"
static-configure:
secrets: inherit
Expand Down Expand Up @@ -81,7 +81,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -93,7 +93,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
14 changes: 13 additions & 1 deletion ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,18 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df
total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call']
pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1)
pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1)

pr_df = pr_df[["total", "passed", "failed", "skipped"]]
cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)

# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%'
pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%'

pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
Expand All @@ -95,6 +105,8 @@ def emoji_failed(x):

print(comment)
print()
print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
print(df.to_markdown())
4 changes: 3 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh
DEPENDENCIES=(
cudf
cudf_kafka
cugraph
cuml
custreamz
dask-cuda
dask-cudf
Expand All @@ -57,7 +59,7 @@ DEPENDENCIES=(
rmm
)
for DEP in "${DEPENDENCIES[@]}"; do
for FILE in dependencies.yaml conda/environments/*.yaml; do
for FILE in dependencies.yaml conda/environments/*.yaml python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml; do
sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
done
for FILE in python/*/pyproject.toml; do
Expand Down
27 changes: 27 additions & 0 deletions ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# Support invoking run_cudf_polars_pytests.sh outside the script directory
# Assumption, polars has been cloned in the root of the repo.
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../polars/

DESELECTED_TESTS=(
"tests/unit/test_polars_import.py::test_polars_import" # relies on a polars built in place
"tests/unit/streaming/test_streaming_sort.py::test_streaming_sort[True]" # relies on polars built in debug mode
"tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error
"tests/docs/test_user_guide.py" # No dot binary in CI image
)

DESELECTED_TESTS=$(printf -- " --deselect %s" "${DESELECTED_TESTS[@]}")
python -m pytest \
--import-mode=importlib \
--cache-clear \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
69 changes: 69 additions & 0 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -eou pipefail

# We will only fail these tests if the PR touches code in pylibcudf
# or cudf_polars itself.
# Note, the three dots mean we are doing diff between the merge-base
# of upstream and HEAD. So this is asking, "does _this branch_ touch
# files in cudf_polars/pylibcudf", rather than "are there changes
# between upstream and this branch which touch cudf_polars/pylibcudf"
# TODO: is the target branch exposed anywhere in an environment variable?
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
then
HAS_CHANGES=1
rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
else
HAS_CHANGES=0
rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
fi

rapids-logger "Download wheels"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

# Download the pylibcudf built in the previous step
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep

rapids-logger "Install pylibcudf"
python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl

rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

# Install requirements for running polars tests
rapids-logger "Install polars test requirements"
python -m pip install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt

function set_exitcode()
{
EXITCODE=$?
}
EXITCODE=0
trap set_exitcode ERR
set +e

rapids-logger "Run polars tests"
./ci/run_cudf_polars_polars_tests.sh

trap ERR
set -e

if [ ${EXITCODE} != 0 ]; then
rapids-logger "Running polars test suite FAILED: exitcode ${EXITCODE}"
else
rapids-logger "Running polars test suite PASSED"
fi

if [ ${HAS_CHANGES} == 1 ]; then
exit ${EXITCODE}
else
exit 0
fi
1 change: 1 addition & 0 deletions ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ rapids-logger "pytest pylibcudf"
pushd python/pylibcudf/pylibcudf/tests
python -m pytest \
--cache-clear \
--numprocesses=8 \
--dist=worksteal \
.
popd
Expand Down
7 changes: 7 additions & 0 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ set -eou pipefail
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ];
then
HAS_CHANGES=1
rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
else
HAS_CHANGES=0
rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
fi

rapids-logger "Download wheels"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist

Expand All @@ -43,6 +47,9 @@ python -m pip install \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
2 changes: 2 additions & 0 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ pushd python/dask_cudf/dask_cudf
DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd

Expand All @@ -50,5 +51,6 @@ pushd python/dask_cudf/dask_cudf
DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ add_library(
src/io/csv/reader_impl.cu
src/io/csv/writer_impl.cu
src/io/functions.cpp
src/io/json/host_tree_algorithms.cu
src/io/json/json_column.cu
src/io/json/json_normalization.cu
src/io/json/json_tree.cu
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp)

# ##################################################################################################
# * nds-h benchmark --------------------------------------------------------------------------------
ConfigureNVBench(NDSH_Q1 ndsh/q01.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q5 ndsh/q05.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q6 ndsh/q06.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q9 ndsh/q09.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q10 ndsh/q10.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q01_NVBENCH ndsh/q01.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q05_NVBENCH ndsh/q05.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q06_NVBENCH ndsh/q06.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q09_NVBENCH ndsh/q09.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q10_NVBENCH ndsh/q10.cpp ndsh/utilities.cpp)

# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] =
cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, mr);
cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, stream, mr);

auto const left_indices_span = cudf::device_span<cudf::size_type const>{*left_join_indices};
auto const right_indices_span = cudf::device_span<cudf::size_type const>{*right_join_indices};
Expand Down
15 changes: 10 additions & 5 deletions cpp/benchmarks/ndsh/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/transform.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <cstdlib>
#include <ctime>
Expand Down Expand Up @@ -146,11 +147,15 @@ std::unique_ptr<cudf::table> join_and_gather(cudf::table_view const& left_input,
cudf::null_equality compare_nulls)
{
CUDF_FUNC_RANGE();
constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK;
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] = cudf::inner_join(
left_selected, right_selected, compare_nulls, cudf::get_current_device_resource_ref());
constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK;
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] =
cudf::inner_join(left_selected,
right_selected,
compare_nulls,
cudf::get_default_stream(),
cudf::get_current_device_resource_ref());

auto const left_indices_span = cudf::device_span<cudf::size_type const>{*left_join_indices};
auto const right_indices_span = cudf::device_span<cudf::size_type const>{*right_join_indices};
Expand Down
2 changes: 2 additions & 0 deletions cpp/examples/parquet_io/parquet_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#include "common.hpp"

#include <cudf/utilities/default_stream.hpp>

/**
* @file parquet_io.cpp
* @brief Demonstrates usage of the libcudf APIs to read and write
Expand Down
Loading

0 comments on commit 1827654

Please sign in to comment.