Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into enh-json_spark_no_arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann authored Oct 17, 2024
2 parents a1ae603 + 14209c1 commit 8f44c81
Show file tree
Hide file tree
Showing 331 changed files with 13,095 additions and 9,884 deletions.
22 changes: 22 additions & 0 deletions .devcontainer/cuda12.5-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,31 @@
],
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/cuda:24.12": {
"version": "12.5",
"installCompilers": false,
"installProfilers": true,
"installDevPackages": false,
"installcuDNN": false,
"installcuTensor": false,
"installNCCL": false,
"installCUDARuntime": false,
"installNVRTC": false,
"installOpenCL": false,
"installcuBLAS": false,
"installcuSPARSE": false,
"installcuFFT": false,
"installcuFile": false,
"installcuRAND": false,
"installcuSOLVER": false,
"installNPP": false,
"installnvJPEG": false,
"pruneStaticLibs": true
},
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
Expand Down
14 changes: 13 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,23 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
# Use the wheel container so we can skip conda solves and since our
# primary static consumers (Spark) are not in conda anyway.
container_image: "rapidsai/ci-wheel:latest"
run_script: "ci/configure_cpp_static.sh"
clang-tidy:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
run_script: "ci/clang_tidy.sh"
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ repos:
entry: 'pytest\.xfail'
language: pygrep
types: [python]
- id: no-unseeded-default-rng
name: no-unseeded-default-rng
description: 'Enforce that no non-seeded default_rng is used and default_rng is used instead of np.random.seed'
entry: |
# Check for usage of default_rng without seeding
default_rng\(\)|
# Check for usage of np.random.seed
np.random.seed\(
language: pygrep
types: [python]
- id: cmake-format
name: cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
Expand Down
16 changes: 8 additions & 8 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

set -euo pipefail

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"
export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh
Expand All @@ -28,16 +28,16 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"
"libcudf=${RAPIDS_VERSION}" \
"pylibcudf=${RAPIDS_VERSION}" \
"cudf=${RAPIDS_VERSION}" \
"dask-cudf=${RAPIDS_VERSION}"

export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-logger "Build CPP docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_NUMBER}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html"
mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html"
Expand All @@ -57,4 +57,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/html"
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
popd

rapids-upload-docs
RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs
5 changes: 5 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,10 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/custreamz

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf-polars

rapids-upload-conda-to-s3 python
29 changes: 29 additions & 0 deletions ci/clang_tidy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create clang-tidy conda environment"
. /opt/conda/etc/profile.d/conda.sh

ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file-key clang_tidy \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy

# Temporarily allow unbound variables for conda activation.
set +u
conda activate clang_tidy
set -u

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

source rapids-configure-sccache

# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled.
cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja
cmake --build cpp/build
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md
# .devcontainer files
find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}"
done
5 changes: 4 additions & 1 deletion ci/run_cudf_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ compute-sanitizer --tool memcheck custom_optimized names.csv
compute-sanitizer --tool memcheck custom_prealloc names.csv
compute-sanitizer --tool memcheck custom_with_malloc names.csv

compute-sanitizer --tool memcheck parquet_io
compute-sanitizer --tool memcheck parquet_io example.parquet
compute-sanitizer --tool memcheck parquet_io example.parquet output.parquet DELTA_BINARY_PACKED ZSTD TRUE

compute-sanitizer --tool memcheck parquet_io_multithreaded example.parquet
compute-sanitizer --tool memcheck parquet_io_multithreaded example.parquet 4 DEVICE_BUFFER 2 2

exit ${EXITCODE}
10 changes: 5 additions & 5 deletions ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate C++ testing dependencies"

Expand Down Expand Up @@ -33,10 +33,10 @@ rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"
"libcudf=${RAPIDS_VERSION}" \
"libcudf_kafka=${RAPIDS_VERSION}" \
"libcudf-tests=${RAPIDS_VERSION}" \
"libcudf-example=${RAPIDS_VERSION}"

rapids-logger "Check GPU usage"
nvidia-smi
4 changes: 2 additions & 2 deletions ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate Java testing dependencies"

Expand All @@ -32,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
"libcudf=${RAPIDS_VERSION}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
6 changes: 3 additions & 3 deletions ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate notebook testing dependencies"

Expand All @@ -32,8 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
"cudf=${RAPIDS_VERSION}" \
"libcudf=${RAPIDS_VERSION}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
pushd notebooks
Expand Down
6 changes: 3 additions & 3 deletions ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Generate Python testing dependencies"

Expand Down Expand Up @@ -40,5 +40,5 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
"cudf=${RAPIDS_VERSION}" \
"libcudf=${RAPIDS_VERSION}"
25 changes: 20 additions & 5 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh test_python_other

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"
"dask-cudf=${RAPIDS_VERSION}" \
"cudf_kafka=${RAPIDS_VERSION}" \
"custreamz=${RAPIDS_VERSION}" \
"cudf-polars=${RAPIDS_VERSION}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand All @@ -37,7 +38,7 @@ rapids-logger "pytest dask_cudf (legacy)"
DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
--numprocesses=8 \
--dist=loadscope \
--dist=worksteal \
.

rapids-logger "pytest cudf_kafka"
Expand All @@ -54,5 +55,19 @@ rapids-logger "pytest custreamz"
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
--cov-report=term

# Note that cudf-polars uses rmm.mr.CudaAsyncMemoryResource() which allocates
# half the available memory. This doesn't play well with multiple workers, so
# we keep --numprocesses=1 for now. This should be resolved by
# https://github.com/rapidsai/cudf/issues/16723.
rapids-logger "pytest cudf-polars"
./ci/run_cudf_polars_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml" \
--numprocesses=1 \
--dist=worksteal \
--cov-config=./pyproject.toml \
--cov=cudf_polars \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-polars-coverage.xml" \
--cov-report=term

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
4 changes: 4 additions & 0 deletions conda/recipes/cudf-polars/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh cudf_polars
61 changes: 61 additions & 0 deletions conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version = environ['CONDA_PY'] %}
{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
{% set date_string = environ['RAPIDS_DATE_STRING'] %}

package:
name: cudf-polars
version: {{ version }}

source:
path: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- AWS_SESSION_TOKEN
- CMAKE_C_COMPILER_LAUNCHER
- CMAKE_CUDA_COMPILER_LAUNCHER
- CMAKE_CXX_COMPILER_LAUNCHER
- CMAKE_GENERATOR
- PARALLEL_LEVEL
- SCCACHE_BUCKET
- SCCACHE_IDLE_TIMEOUT
- SCCACHE_REGION
- SCCACHE_S3_KEY_PREFIX=cudf-polars-aarch64 # [aarch64]
- SCCACHE_S3_KEY_PREFIX=cudf-polars-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS

requirements:
host:
- python
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- setuptools
- cuda-version ={{ cuda_version }}
run:
- python
- pylibcudf ={{ version }}
- polars >=1.8,<1.9
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
requires:
- cuda-version ={{ cuda_version }}
imports:
- cudf_polars


about:
home: https://rapids.ai/
license: Apache-2.0
license_family: APACHE
license_file: LICENSE
summary: cudf-polars library
2 changes: 1 addition & 1 deletion cpp/.clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Checks:
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
WarningsAsErrors: '*'
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
Expand Down
Loading

0 comments on commit 8f44c81

Please sign in to comment.