Skip to content

Commit

Permalink
Merge branch 'doc/pylibcudf-doc' of github.com:Matt711/cudf into doc/…
Browse files Browse the repository at this point in the history
…pylibcudf-doc
  • Loading branch information
Matt711 committed Sep 21, 2024
2 parents d75df82 + caac704 commit 1b91e07
Show file tree
Hide file tree
Showing 94 changed files with 4,475 additions and 1,322 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cudf.polars:
- 'python/cudf_polars/**'

pylibcudf:
- 'python/cudf/pylibcudf/**'
- 'python/pylibcudf/**'

libcudf:
- 'cpp/**'
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
- wheel-tests-cudf
- wheel-build-cudf-polars
- wheel-tests-cudf-polars
- cudf-polars-polars-tests
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -244,6 +245,17 @@ jobs:
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_wheel_cudf_polars.sh"
cudf-polars-polars-tests:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars_polars_tests.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
27 changes: 27 additions & 0 deletions ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# Support invoking run_cudf_polars_pytests.sh outside the script directory
# Assumption, polars has been cloned in the root of the repo.
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../polars/

DESELECTED_TESTS=(
"tests/unit/test_polars_import.py::test_polars_import" # relies on a polars built in place
"tests/unit/streaming/test_streaming_sort.py::test_streaming_sort[True]" # relies on polars built in debug mode
"tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error
"tests/docs/test_user_guide.py" # No dot binary in CI image
)

DESELECTED_TESTS=$(printf -- " --deselect %s" "${DESELECTED_TESTS[@]}")
python -m pytest \
--import-mode=importlib \
--cache-clear \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
69 changes: 69 additions & 0 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -eou pipefail

# We will only fail these tests if the PR touches code in pylibcudf
# or cudf_polars itself.
# Note, the three dots mean we are doing diff between the merge-base
# of upstream and HEAD. So this is asking, "does _this branch_ touch
# files in cudf_polars/pylibcudf", rather than "are there changes
# between upstream and this branch which touch cudf_polars/pylibcudf"
# TODO: is the target branch exposed anywhere in an environment variable?
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
then
HAS_CHANGES=1
rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
else
HAS_CHANGES=0
rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
fi

rapids-logger "Download wheels"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

# Download the pylibcudf built in the previous step
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep

rapids-logger "Install pylibcudf"
python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl

rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

# Install requirements for running polars tests
rapids-logger "Install polars test requirements"
python -m pip install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt

function set_exitcode()
{
EXITCODE=$?
}
EXITCODE=0
trap set_exitcode ERR
set +e

rapids-logger "Run polars tests"
./ci/run_cudf_polars_polars_tests.sh

trap ERR
set -e

if [ ${EXITCODE} != 0 ]; then
rapids-logger "Running polars test suite FAILED: exitcode ${EXITCODE}"
else
rapids-logger "Running polars test suite PASSED"
fi

if [ ${HAS_CHANGES} == 1 ]; then
exit ${EXITCODE}
else
exit 0
fi
7 changes: 7 additions & 0 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ set -eou pipefail
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ];
then
HAS_CHANGES=1
rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
else
HAS_CHANGES=0
rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
fi

rapids-logger "Download wheels"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist

Expand All @@ -43,6 +47,9 @@ python -m pip install \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ add_library(
src/io/csv/reader_impl.cu
src/io/csv/writer_impl.cu
src/io/functions.cpp
src/io/json/host_tree_algorithms.cu
src/io/json/json_column.cu
src/io/json/json_normalization.cu
src/io/json/json_tree.cu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] =
cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, mr);
cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, stream, mr);

auto const left_indices_span = cudf::device_span<cudf::size_type const>{*left_join_indices};
auto const right_indices_span = cudf::device_span<cudf::size_type const>{*right_join_indices};
Expand Down
15 changes: 10 additions & 5 deletions cpp/benchmarks/ndsh/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/transform.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <cstdlib>
#include <ctime>
Expand Down Expand Up @@ -146,11 +147,15 @@ std::unique_ptr<cudf::table> join_and_gather(cudf::table_view const& left_input,
cudf::null_equality compare_nulls)
{
CUDF_FUNC_RANGE();
constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK;
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] = cudf::inner_join(
left_selected, right_selected, compare_nulls, cudf::get_current_device_resource_ref());
constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK;
auto const left_selected = left_input.select(left_on);
auto const right_selected = right_input.select(right_on);
auto const [left_join_indices, right_join_indices] =
cudf::inner_join(left_selected,
right_selected,
compare_nulls,
cudf::get_default_stream(),
cudf::get_current_device_resource_ref());

auto const left_indices_span = cudf::device_span<cudf::size_type const>{*left_join_indices};
auto const right_indices_span = cudf::device_span<cudf::size_type const>{*right_join_indices};
Expand Down
9 changes: 7 additions & 2 deletions cpp/examples/parquet_io/parquet_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include "../utilities/timer.hpp"

#include <cudf/utilities/default_stream.hpp>

/**
* @file parquet_io.cpp
* @brief Demonstrates usage of the libcudf APIs to read and write
Expand Down Expand Up @@ -159,8 +161,11 @@ int main(int argc, char const** argv)
// Left anti-join the original and transcoded tables
// identical tables should not throw an exception and
// return an empty indices vector
auto const indices = cudf::left_anti_join(
input->view(), transcoded_input->view(), cudf::null_equality::EQUAL, resource.get());
auto const indices = cudf::left_anti_join(input->view(),
transcoded_input->view(),
cudf::null_equality::EQUAL,
cudf::get_default_stream(),
resource.get());

// No exception thrown, check indices
auto const valid = indices->size() == 0;
Expand Down
Loading

0 comments on commit 1b91e07

Please sign in to comment.