From 83f9d2b6195a2528d0d9465975f3b9c75a333799 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 19 Sep 2024 12:02:58 -0400 Subject: [PATCH 1/4] DOC v24.12 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 6 +-- .devcontainer/cuda11.8-pip/devcontainer.json | 6 +-- .../cuda12.5-conda/devcontainer.json | 6 +-- .devcontainer/cuda12.5-pip/devcontainer.json | 6 +-- .github/workflows/build.yaml | 28 +++++------ .github/workflows/pandas-tests.yaml | 2 +- .github/workflows/pr.yaml | 48 +++++++++---------- .../workflows/pr_issue_status_automation.yml | 6 +-- .github/workflows/test.yaml | 24 +++++----- README.md | 2 +- VERSION | 2 +- ci/test_wheel_cudf_polars.sh | 2 +- .../all_cuda-118_arch-x86_64.yaml | 10 ++-- .../all_cuda-125_arch-x86_64.yaml | 10 ++-- cpp/examples/versions.cmake | 2 +- dependencies.yaml | 48 +++++++++---------- java/ci/README.md | 4 +- java/pom.xml | 2 +- .../dependencies.yaml | 6 +-- python/cudf/pyproject.toml | 14 +++--- python/cudf_kafka/pyproject.toml | 2 +- python/cudf_polars/docs/overview.md | 2 +- python/cudf_polars/pyproject.toml | 2 +- python/custreamz/pyproject.toml | 4 +- python/dask_cudf/pyproject.toml | 6 +-- python/libcudf/pyproject.toml | 4 +- python/pylibcudf/pyproject.toml | 10 ++-- 27 files changed, 132 insertions(+), 132 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 7a1361e52c5..d86fc0e550a 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 64d7cd54130..66a3b22df11 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index c1924243506..2a195c6c81d 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index beab2940176..125c85cefa9 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b5d17022a3a..08d08c9c5a0 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libcudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -81,7 +81,7 @@ jobs: wheel-publish-libcudf: needs: wheel-build-libcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -92,7 +92,7 @@ jobs: wheel-build-pylibcudf: needs: [wheel-publish-libcudf] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -102,7 +102,7 @@ jobs: wheel-publish-pylibcudf: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -113,7 +113,7 @@ jobs: wheel-build-cudf: needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -134,7 +134,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -146,7 +146,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -157,7 +157,7 @@ jobs: wheel-build-cudf-polars: needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -169,7 +169,7 @@ jobs: wheel-publish-cudf-polars: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml index 10c803f7921..c676032779f 100644 --- a/.github/workflows/pandas-tests.yaml +++ b/.github/workflows/pandas-tests.yaml @@ -17,7 +17,7 @@ jobs: pandas-tests: # run the Pandas unit tests secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b515dbff9f3..ade2f35397b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -37,7 +37,7 @@ jobs: - pandas-tests - pandas-tests-diff secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 if: always() with: needs: ${{ toJSON(needs) }} @@ -104,39 +104,39 @@ jobs: - '!notebooks/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: pull-request enable_check_symbols: true conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 if: needs.changed-files.outputs.test_cpp == 'true' with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-cudf-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request @@ -145,7 +145,7 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request @@ -153,7 +153,7 @@ jobs: conda-java-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 if: needs.changed-files.outputs.test_java == 'true' with: build_type: pull-request @@ -164,7 +164,7 @@ jobs: static-configure: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -174,7 +174,7 @@ jobs: conda-notebook-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 if: needs.changed-files.outputs.test_notebooks == 'true' with: build_type: pull-request @@ -185,7 +185,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -195,7 +195,7 @@ jobs: wheel-build-libcudf: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -204,21 +204,21 @@ jobs: wheel-build-pylibcudf: needs: [checks, wheel-build-libcudf] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: "ci/build_wheel_pylibcudf.sh" wheel-build-cudf: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request @@ -226,7 +226,7 @@ jobs: wheel-build-cudf-polars: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -235,7 +235,7 @@ jobs: wheel-tests-cudf-polars: needs: [wheel-build-cudf-polars, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -247,7 +247,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -256,7 +256,7 @@ jobs: wheel-tests-dask-cudf: needs: [wheel-build-dask-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -265,7 +265,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 with: arch: '["amd64"]' cuda: '["12.5"]' @@ -276,7 +276,7 @@ jobs: unit-tests-cudf-pandas: needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -287,7 +287,7 @@ jobs: # run the Pandas unit tests using PR branch needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -299,7 +299,7 @@ jobs: pandas-tests-diff: # diff the results of running the Pandas unit tests and publish a job summary needs: pandas-tests - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: node_type: cpu4 build_type: pull-request diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml index 45e5191eb54..af8d1289ea1 100644 --- a/.github/workflows/pr_issue_status_automation.yml +++ b/.github/workflows/pr_issue_status_automation.yml @@ -23,7 +23,7 @@ on: jobs: get-project-id: - uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.12 if: github.event.pull_request.state == 'open' secrets: inherit permissions: @@ -34,7 +34,7 @@ jobs: update-status: # This job sets the PR and its linked issues to "In Progress" status - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.12 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: @@ -50,7 +50,7 @@ jobs: update-sprint: # This job sets the PR and its linked issues to the current "Weekly Sprint" - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.12 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8605fa46f68..c06fe929988 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -45,7 +45,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" static-configure: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -54,7 +54,7 @@ jobs: run_script: "ci/configure_cpp_static.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -64,7 +64,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -85,7 +85,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -97,7 +97,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -106,7 +106,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -117,7 +117,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh unit-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -126,7 +126,7 @@ jobs: script: ci/cudf_pandas_scripts/run_tests.sh third-party-integration-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 8f8c2adac2f..169d2e4eded 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=24.10 python=3.12 cuda-version=12.5 + cudf=24.12 python=3.12 cuda-version=12.5 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index 7c7ba04436f..af28c42b528 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.10.00 +24.12.00 diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index 9844090258a..da9e50d0a2b 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -10,7 +10,7 @@ set -eou pipefail # files in cudf_polars/pylibcudf", rather than "are there changes # between upstream and this branch which touch cudf_polars/pylibcudf" # TODO: is the target branch exposed anywhere in an environment variable? -if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ]; +if [ -n "$(git diff --name-only origin/branch-24.12...HEAD -- python/cudf_polars/ python/pylibcudf/)" ]; then HAS_CHANGES=1 else diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c96e8706d27..62d75965b9f 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.10.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -42,9 +42,9 @@ dependencies: - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==24.10.*,>=0.0.0a0 +- libkvikio==24.12.*,>=0.0.0a0 - librdkafka>=2.5.0,<2.6.0a0 -- librmm==24.10.*,>=0.0.0a0 +- librmm==24.12.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python @@ -78,9 +78,9 @@ dependencies: - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - rich -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.10.0 - scipy diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index e54a44d9f6e..f16f2b377df 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -27,7 +27,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.10.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -41,9 +41,9 @@ dependencies: - jupyter_client - libcufile-dev - libcurand-dev -- libkvikio==24.10.*,>=0.0.0a0 +- libkvikio==24.12.*,>=0.0.0a0 - librdkafka>=2.5.0,<2.6.0a0 -- librmm==24.10.*,>=0.0.0a0 +- librmm==24.12.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python @@ -76,9 +76,9 @@ dependencies: - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - rich -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.10.0 - scipy diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake index 44493011673..51613090534 100644 --- a/cpp/examples/versions.cmake +++ b/cpp/examples/versions.cmake @@ -12,4 +12,4 @@ # the License. # ============================================================================= -set(CUDF_TAG branch-24.10) +set(CUDF_TAG branch-24.12) diff --git a/dependencies.yaml b/dependencies.yaml index 7a13043cc5f..325f2dbcba7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -514,7 +514,7 @@ dependencies: - output_types: [conda] packages: - breathe>=4.35.0 - - dask-cuda==24.10.*,>=0.0.0a0 + - dask-cuda==24.12.*,>=0.0.0a0 - *doxygen - make - myst-nb @@ -655,7 +655,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-dask-dependency==24.10.*,>=0.0.0a0 + - rapids-dask-dependency==24.12.*,>=0.0.0a0 run_custreamz: common: - output_types: conda @@ -781,13 +781,13 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask-cuda==24.10.*,>=0.0.0a0 + - dask-cuda==24.12.*,>=0.0.0a0 - *numba depends_on_libcudf: common: - output_types: conda packages: - - &libcudf_unsuffixed libcudf==24.10.*,>=0.0.0a0 + - &libcudf_unsuffixed libcudf==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -801,18 +801,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcudf-cu12==24.10.*,>=0.0.0a0 + - libcudf-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - libcudf-cu11==24.10.*,>=0.0.0a0 + - libcudf-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*libcudf_unsuffixed]} depends_on_pylibcudf: common: - output_types: conda packages: - - &pylibcudf_unsuffixed pylibcudf==24.10.*,>=0.0.0a0 + - &pylibcudf_unsuffixed pylibcudf==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -826,18 +826,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibcudf-cu12==24.10.*,>=0.0.0a0 + - pylibcudf-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibcudf-cu11==24.10.*,>=0.0.0a0 + - pylibcudf-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcudf_unsuffixed]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -851,18 +851,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.10.*,>=0.0.0a0 + - cudf-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.10.*,>=0.0.0a0 + - cudf-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_cudf_kafka: common: - output_types: conda packages: - - &cudf_kafka_unsuffixed cudf_kafka==24.10.*,>=0.0.0a0 + - &cudf_kafka_unsuffixed cudf_kafka==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -876,12 +876,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu12==24.10.*,>=0.0.0a0 + - cudf_kafka-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu11==24.10.*,>=0.0.0a0 + - cudf_kafka-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_kafka_unsuffixed]} depends_on_cupy: common: @@ -902,7 +902,7 @@ dependencies: common: - output_types: conda packages: - - &libkvikio_unsuffixed libkvikio==24.10.*,>=0.0.0a0 + - &libkvikio_unsuffixed libkvikio==24.12.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.nvidia.com @@ -914,12 +914,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libkvikio-cu12==24.10.*,>=0.0.0a0 + - libkvikio-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - libkvikio-cu11==24.10.*,>=0.0.0a0 + - libkvikio-cu11==24.12.*,>=0.0.0a0 - matrix: packages: - *libkvikio_unsuffixed @@ -927,7 +927,7 @@ dependencies: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -941,12 +941,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.10.*,>=0.0.0a0 + - librmm-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.10.*,>=0.0.0a0 + - librmm-cu11==24.12.*,>=0.0.0a0 - matrix: packages: - *librmm_unsuffixed @@ -954,7 +954,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -968,12 +968,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==24.10.*,>=0.0.0a0 + - rmm-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - rmm-cu11==24.10.*,>=0.0.0a0 + - rmm-cu11==24.12.*,>=0.0.0a0 - matrix: packages: - *rmm_unsuffixed diff --git a/java/ci/README.md b/java/ci/README.md index ccb9efb50b6..95b93698cae 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-rocky8 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.10 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.12 ``` ### Build cuDF jar with devtoolset @@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-24.10.0-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-24.12.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index e4f1cdf64e7..450cfbdbc84 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ ai.rapids cudf - 24.10.0-SNAPSHOT + 24.12.0-SNAPSHOT cudfjni diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml index f742f46c7ed..84b731e6c51 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml @@ -182,7 +182,7 @@ dependencies: common: - output_types: conda packages: - - cudf==24.10.*,>=0.0.0a0 + - cudf==24.12.*,>=0.0.0a0 - pandas - pytest - pytest-xdist @@ -248,13 +248,13 @@ dependencies: common: - output_types: conda packages: - - cuml==24.10.*,>=0.0.0a0 + - cuml==24.12.*,>=0.0.0a0 - scikit-learn test_cugraph: common: - output_types: conda packages: - - cugraph==24.10.*,>=0.0.0a0 + - cugraph==24.12.*,>=0.0.0a0 - networkx test_ibis: common: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 5833ee43c07..f90cb96e189 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "libcudf==24.10.*,>=0.0.0a0", + "libcudf==24.12.*,>=0.0.0a0", "numba>=0.57", "numpy>=1.23,<3.0a0", "nvtx>=0.2.1", @@ -31,9 +31,9 @@ dependencies = [ "pandas>=2.0,<2.2.3dev0", "ptxcompiler", "pyarrow>=14.0.0,<18.0.0a0", - "pylibcudf==24.10.*,>=0.0.0a0", + "pylibcudf==24.12.*,>=0.0.0a0", "rich", - "rmm==24.10.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -131,11 +131,11 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", - "libcudf==24.10.*,>=0.0.0a0", - "librmm==24.10.*,>=0.0.0a0", + "libcudf==24.12.*,>=0.0.0a0", + "librmm==24.12.*,>=0.0.0a0", "ninja", - "pylibcudf==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "pylibcudf==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 6ca798bb11c..a1a3ec37842 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -18,7 +18,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 6cd36136bf8..daf8286ae07 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -8,7 +8,7 @@ You will need: preferred configuration. Or else, use [rustup](https://www.rust-lang.org/tools/install) 2. A [cudf development - environment](https://github.com/rapidsai/cudf/blob/branch-24.10/CONTRIBUTING.md#setting-up-your-build-environment). + environment](https://github.com/rapidsai/cudf/blob/branch-24.12/CONTRIBUTING.md#setting-up-your-build-environment). The combined devcontainer works, or whatever your favourite approach is. > ![NOTE] These instructions will get simpler as we merge code in. diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 984b5487b98..b44f633e2d9 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "polars>=1.0,<1.3", - "pylibcudf==24.10.*,>=0.0.0a0", + "pylibcudf==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 5aa474e2862..85ab0024bb5 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -20,8 +20,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "confluent-kafka>=2.5.0,<2.6.0a0", - "cudf==24.10.*,>=0.0.0a0", - "cudf_kafka==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", + "cudf_kafka==24.12.*,>=0.0.0a0", "streamz", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 9ac834586a6..c64de06338f 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -19,12 +19,12 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "numpy>=1.23,<3.0a0", "pandas>=2.0,<2.2.3dev0", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -45,7 +45,7 @@ cudf = "dask_cudf.backends:CudfDXBackendEntrypoint" [project.optional-dependencies] test = [ - "dask-cuda==24.10.*,>=0.0.0a0", + "dask-cuda==24.12.*,>=0.0.0a0", "numba>=0.57", "pytest-cov", "pytest-xdist", diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 2c98b97eddf..5bffe9fd96c 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -66,7 +66,7 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", - "libkvikio==24.10.*,>=0.0.0a0", - "librmm==24.10.*,>=0.0.0a0", + "libkvikio==24.12.*,>=0.0.0a0", + "librmm==24.12.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 3aaca09d8bd..a8224f54e1c 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -19,11 +19,11 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python>=11.7.1,<12.0a0", - "libcudf==24.10.*,>=0.0.0a0", + "libcudf==24.12.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", "pyarrow>=14.0.0,<18.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -102,10 +102,10 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", - "libcudf==24.10.*,>=0.0.0a0", - "librmm==24.10.*,>=0.0.0a0", + "libcudf==24.12.*,>=0.0.0a0", + "librmm==24.12.*,>=0.0.0a0", "ninja", - "rmm==24.10.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build] From 8c975fe3d75cde6404a0c0e4b2b8162929c96453 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 23 Sep 2024 10:08:43 -0500 Subject: [PATCH 2/4] Add in support for setting delim when parsing JSON through java (#16867) This just adds in JNI APIs to allow for changing the delimiter when parsing JSON. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Alessandro Bellina (https://github.com/abellina) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16867 --- .../main/java/ai/rapids/cudf/JSONOptions.java | 16 ++++++++++++++++ java/src/main/java/ai/rapids/cudf/Table.java | 19 ++++++++++++++----- java/src/main/native/src/TableJni.cpp | 12 ++++++++++-- .../test/java/ai/rapids/cudf/TableTest.java | 19 ++++++++++++++++++- 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java index c8308ca17ec..17b497be5ee 100644 --- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java +++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java @@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions { private final boolean allowLeadingZeros; private final boolean allowNonNumericNumbers; private final boolean allowUnquotedControlChars; + private final byte lineDelimiter; private JSONOptions(Builder builder) { super(builder); @@ -52,6 +53,11 @@ private JSONOptions(Builder builder) { allowLeadingZeros = builder.allowLeadingZeros; allowNonNumericNumbers = builder.allowNonNumericNumbers; allowUnquotedControlChars = builder.allowUnquotedControlChars; + lineDelimiter = builder.lineDelimiter; + } + + public byte getLineDelimiter() { + return lineDelimiter; } public boolean isDayFirst() { @@ -123,6 +129,16 @@ public static final class Builder extends ColumnFilterOptions.Builder Byte.MAX_VALUE) { + throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter); + } + lineDelimiter = (byte)delimiter; + return this; + } + /** * Should json validation be strict or not */ diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 09da43374ae..19c72809cea 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -258,7 +258,8 @@ private static native long readJSON(int[] numChildren, String[] columnNames, boolean strictValidation, boolean allowLeadingZeros, boolean allowNonNumericNumbers, - boolean allowUnquotedControl) throws CudfException; + boolean allowUnquotedControl, + byte lineDelimiter) throws CudfException; private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames, int[] dTypeIds, int[] dTypeScales, @@ -272,6 +273,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + byte lineDelimiter, long dsHandle) throws CudfException; private static native long readAndInferJSONFromDataSource(boolean dayFirst, boolean lines, @@ -284,6 +286,7 @@ private static native long readAndInferJSONFromDataSource(boolean dayFirst, bool boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + byte lineDelimiter, long dsHandle) throws CudfException; private static native long readAndInferJSON(long address, long length, @@ -297,7 +300,8 @@ private static native long readAndInferJSON(long address, long length, boolean strictValidation, boolean allowLeadingZeros, boolean allowNonNumericNumbers, - boolean allowUnquotedControl) throws CudfException; + boolean allowUnquotedControl, + byte lineDelimiter) throws CudfException; /** * Read in Parquet formatted data. @@ -1321,7 +1325,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) { opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars()))) { + opts.unquotedControlChars(), + opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, -1); } @@ -1404,7 +1409,8 @@ public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer, opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars())); + opts.unquotedControlChars(), + opts.getLineDelimiter())); } /** @@ -1426,6 +1432,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) { opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + opts.getLineDelimiter(), dsHandle)); return twm; } finally { @@ -1479,7 +1486,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars()))) { + opts.unquotedControlChars(), + opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, emptyRowCount); } } @@ -1518,6 +1526,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + opts.getLineDelimiter(), dsHandle))) { return gatherJSONColumns(schema, twm, emptyRowCount); } finally { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 92e213bcb60..96d4c2c4eeb 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1627,6 +1627,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jbyte line_delimiter, jlong ds_handle) { JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); @@ -1646,6 +1647,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { @@ -1676,7 +1678,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env, jboolean strict_validation, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, - jboolean allow_unquoted_control) + jboolean allow_unquoted_control, + jbyte line_delimiter) { JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0); if (buffer_length <= 0) { @@ -1700,6 +1703,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env, .normalize_whitespace(static_cast(normalize_whitespace)) .strict_validation(strict_validation) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .keep_quotes(keep_quotes); if (strict_validation) { opts.numeric_leading_zeros(allow_leading_zeros) @@ -1814,6 +1818,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jbyte line_delimiter, jlong ds_handle) { JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); @@ -1848,6 +1853,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { @@ -1908,7 +1914,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, jboolean strict_validation, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, - jboolean allow_unquoted_control) + jboolean allow_unquoted_control, + jbyte line_delimiter) { bool read_buffer = true; if (buffer == 0) { @@ -1957,6 +1964,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 830f2b33b32..c7fcb1756b6 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -40,7 +40,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; -import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import java.io.*; @@ -656,6 +655,24 @@ void testJSONValidationUnquotedControl() { } } + private static final byte[] CR_JSON_TEST_BUFFER = ("{\"a\":\"12\n3\"}\0" + + "{\"a\":\"AB\nC\"}\0").getBytes(StandardCharsets.UTF_8); + + @Test + void testReadJSONDelim() { + Schema schema = Schema.builder().addColumn(DType.STRING, "a").build(); + JSONOptions opts = JSONOptions.builder() + .withLines(true) + .withLineDelimiter('\0') + .build(); + try (Table expected = new Table.TestBuilder() + .column("12\n3", "AB\nC") + .build(); + Table found = Table.readJSON(schema, opts, CR_JSON_TEST_BUFFER)) { + assertTablesAreEqual(expected, found); + } + } + private static final byte[] NESTED_JSON_DATA_BUFFER = ("{\"a\":{\"c\":\"C1\"}}\n" + "{\"a\":{\"c\":\"C2\", \"b\":\"B2\"}}\n" + "{\"d\":[1,2,3]}\n" + From 6badd6b183e966f7f882708a0f4b2c4d0f2b5368 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 24 Sep 2024 08:17:53 -0500 Subject: [PATCH 3/4] Add in support for setting delim when parsing JSON through java (#16867) (#16880) This is a back-port of #16867 to 24.10. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Alessandro Bellina (https://github.com/abellina) URL: https://github.com/rapidsai/cudf/pull/16880 --- .../main/java/ai/rapids/cudf/JSONOptions.java | 16 ++++++++++++++++ java/src/main/java/ai/rapids/cudf/Table.java | 19 ++++++++++++++----- java/src/main/native/src/TableJni.cpp | 12 ++++++++++-- .../test/java/ai/rapids/cudf/TableTest.java | 19 ++++++++++++++++++- 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java index c8308ca17ec..17b497be5ee 100644 --- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java +++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java @@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions { private final boolean allowLeadingZeros; private final boolean allowNonNumericNumbers; private final boolean allowUnquotedControlChars; + private final byte lineDelimiter; private JSONOptions(Builder builder) { super(builder); @@ -52,6 +53,11 @@ private JSONOptions(Builder builder) { allowLeadingZeros = builder.allowLeadingZeros; allowNonNumericNumbers = builder.allowNonNumericNumbers; allowUnquotedControlChars = builder.allowUnquotedControlChars; + lineDelimiter = builder.lineDelimiter; + } + + public byte getLineDelimiter() { + return lineDelimiter; } public boolean isDayFirst() { @@ -123,6 +129,16 @@ public static final class Builder extends ColumnFilterOptions.Builder Byte.MAX_VALUE) { + throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter); + } + lineDelimiter = (byte)delimiter; + return this; + } + /** * Should json validation be strict or not */ diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 09da43374ae..19c72809cea 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -258,7 +258,8 @@ private static native long readJSON(int[] numChildren, String[] columnNames, boolean strictValidation, boolean allowLeadingZeros, boolean allowNonNumericNumbers, - boolean allowUnquotedControl) throws CudfException; + boolean allowUnquotedControl, + byte lineDelimiter) throws CudfException; private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames, int[] dTypeIds, int[] dTypeScales, @@ -272,6 +273,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + byte lineDelimiter, long dsHandle) throws CudfException; private static native long readAndInferJSONFromDataSource(boolean dayFirst, boolean lines, @@ -284,6 +286,7 @@ private static native long readAndInferJSONFromDataSource(boolean dayFirst, bool boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + byte lineDelimiter, long dsHandle) throws CudfException; private static native long readAndInferJSON(long address, long length, @@ -297,7 +300,8 @@ private static native long readAndInferJSON(long address, long length, boolean strictValidation, boolean allowLeadingZeros, boolean allowNonNumericNumbers, - boolean allowUnquotedControl) throws CudfException; + boolean allowUnquotedControl, + byte lineDelimiter) throws CudfException; /** * Read in Parquet formatted data. @@ -1321,7 +1325,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) { opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars()))) { + opts.unquotedControlChars(), + opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, -1); } @@ -1404,7 +1409,8 @@ public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer, opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars())); + opts.unquotedControlChars(), + opts.getLineDelimiter())); } /** @@ -1426,6 +1432,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) { opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + opts.getLineDelimiter(), dsHandle)); return twm; } finally { @@ -1479,7 +1486,8 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b opts.strictValidation(), opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), - opts.unquotedControlChars()))) { + opts.unquotedControlChars(), + opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, emptyRowCount); } } @@ -1518,6 +1526,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + opts.getLineDelimiter(), dsHandle))) { return gatherJSONColumns(schema, twm, emptyRowCount); } finally { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 92e213bcb60..96d4c2c4eeb 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1627,6 +1627,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jbyte line_delimiter, jlong ds_handle) { JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); @@ -1646,6 +1647,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { @@ -1676,7 +1678,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env, jboolean strict_validation, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, - jboolean allow_unquoted_control) + jboolean allow_unquoted_control, + jbyte line_delimiter) { JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0); if (buffer_length <= 0) { @@ -1700,6 +1703,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env, .normalize_whitespace(static_cast(normalize_whitespace)) .strict_validation(strict_validation) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .keep_quotes(keep_quotes); if (strict_validation) { opts.numeric_leading_zeros(allow_leading_zeros) @@ -1814,6 +1818,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jbyte line_delimiter, jlong ds_handle) { JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); @@ -1848,6 +1853,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { @@ -1908,7 +1914,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, jboolean strict_validation, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, - jboolean allow_unquoted_control) + jboolean allow_unquoted_control, + jbyte line_delimiter) { bool read_buffer = true; if (buffer == 0) { @@ -1957,6 +1964,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, .normalize_single_quotes(static_cast(normalize_single_quotes)) .normalize_whitespace(static_cast(normalize_whitespace)) .mixed_types_as_string(mixed_types_as_string) + .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) .keep_quotes(keep_quotes); if (strict_validation) { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 830f2b33b32..c7fcb1756b6 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -40,7 +40,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; -import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import java.io.*; @@ -656,6 +655,24 @@ void testJSONValidationUnquotedControl() { } } + private static final byte[] CR_JSON_TEST_BUFFER = ("{\"a\":\"12\n3\"}\0" + + "{\"a\":\"AB\nC\"}\0").getBytes(StandardCharsets.UTF_8); + + @Test + void testReadJSONDelim() { + Schema schema = Schema.builder().addColumn(DType.STRING, "a").build(); + JSONOptions opts = JSONOptions.builder() + .withLines(true) + .withLineDelimiter('\0') + .build(); + try (Table expected = new Table.TestBuilder() + .column("12\n3", "AB\nC") + .build(); + Table found = Table.readJSON(schema, opts, CR_JSON_TEST_BUFFER)) { + assertTablesAreEqual(expected, found); + } + } + private static final byte[] NESTED_JSON_DATA_BUFFER = ("{\"a\":{\"c\":\"C1\"}}\n" + "{\"a\":{\"c\":\"C2\", \"b\":\"B2\"}}\n" + "{\"d\":[1,2,3]}\n" + From b3518ab7e10f5eabf5ef06a495cc659079e0447c Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 24 Sep 2024 10:15:38 -0500 Subject: [PATCH 4/4] Add in option for Java JSON APIs to do column pruning in CUDF (#16796) This adds in the options to enable column_pruning when reading JSON using the java APIs. This is still in draft because there are test failures if this is turned on for those tests. https://github.com/rapidsai/cudf/issues/16797 That said the performance impact from enabling column pruning on some queries is huge. For one query in particular the current code takes 161.5 seconds and with CUDF column pruning it is just 16.5 seconds. That is a 10x speedup for something that is fairly real world. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Alessandro Bellina (https://github.com/abellina) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16796 --- .../main/java/ai/rapids/cudf/JSONOptions.java | 12 ++++++++++++ java/src/main/java/ai/rapids/cudf/Table.java | 17 +++++++++++++++++ java/src/main/native/src/TableJni.cpp | 12 +++++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java index 17b497be5ee..2bb74c3e3b1 100644 --- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java +++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java @@ -38,6 +38,7 @@ public final class JSONOptions extends ColumnFilterOptions { private final boolean allowLeadingZeros; private final boolean allowNonNumericNumbers; private final boolean allowUnquotedControlChars; + private final boolean cudfPruneSchema; private final byte lineDelimiter; private JSONOptions(Builder builder) { @@ -53,9 +54,14 @@ private JSONOptions(Builder builder) { allowLeadingZeros = builder.allowLeadingZeros; allowNonNumericNumbers = builder.allowNonNumericNumbers; allowUnquotedControlChars = builder.allowUnquotedControlChars; + cudfPruneSchema = builder.cudfPruneSchema; lineDelimiter = builder.lineDelimiter; } + public boolean shouldCudfPruneSchema() { + return cudfPruneSchema; + } + public byte getLineDelimiter() { return lineDelimiter; } @@ -129,8 +135,14 @@ public static final class Builder extends ColumnFilterOptions.Builder Byte.MAX_VALUE) { throw new IllegalArgumentException("Only basic ASCII values are supported as line delimiters " + delimiter); diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 19c72809cea..6d370ca27b2 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -259,6 +259,7 @@ private static native long readJSON(int[] numChildren, String[] columnNames, boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + boolean pruneColumns, byte lineDelimiter) throws CudfException; private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames, @@ -273,6 +274,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co boolean allowLeadingZeros, boolean allowNonNumericNumbers, boolean allowUnquotedControl, + boolean pruneColumns, byte lineDelimiter, long dsHandle) throws CudfException; @@ -1312,6 +1314,10 @@ private static Table gatherJSONColumns(Schema schema, TableWithMeta twm, int emp * @return the file parsed as a table on the GPU. */ public static Table readJSON(Schema schema, JSONOptions opts, File path) { + // only prune the schema if one is provided + boolean cudfPruneSchema = schema.getColumnNames() != null && + schema.getColumnNames().length != 0 && + opts.shouldCudfPruneSchema(); try (TableWithMeta twm = new TableWithMeta( readJSON(schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), @@ -1326,6 +1332,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) { opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + cudfPruneSchema, opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, -1); @@ -1472,6 +1479,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b assert len > 0; assert len <= buffer.length - offset; assert offset >= 0 && offset < buffer.length; + // only prune the schema if one is provided + boolean cudfPruneSchema = schema.getColumnNames() != null && + schema.getColumnNames().length != 0 && + opts.shouldCudfPruneSchema(); try (TableWithMeta twm = new TableWithMeta(readJSON( schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), null, @@ -1487,6 +1498,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + cudfPruneSchema, opts.getLineDelimiter()))) { return gatherJSONColumns(schema, twm, emptyRowCount); } @@ -1513,6 +1525,10 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) { */ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int emptyRowCount) { long dsHandle = DataSourceHelper.createWrapperDataSource(ds); + // only prune the schema if one is provided + boolean cudfPruneSchema = schema.getColumnNames() != null && + schema.getColumnNames().length != 0 && + opts.shouldCudfPruneSchema(); try (TableWithMeta twm = new TableWithMeta(readJSONFromDataSource(schema.getFlattenedNumChildren(), schema.getFlattenedColumnNames(), schema.getFlattenedTypeIds(), schema.getFlattenedTypeScales(), opts.isDayFirst(), @@ -1526,6 +1542,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int opts.leadingZerosAllowed(), opts.nonNumericNumbersAllowed(), opts.unquotedControlChars(), + cudfPruneSchema, opts.getLineDelimiter(), dsHandle))) { return gatherJSONColumns(schema, twm, emptyRowCount); diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 96d4c2c4eeb..0f77da54152 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1649,7 +1649,8 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env, .mixed_types_as_string(mixed_types_as_string) .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) - .keep_quotes(keep_quotes); + .keep_quotes(keep_quotes) + .prune_columns(false); if (strict_validation) { opts.numeric_leading_zeros(allow_leading_zeros) .nonnumeric_numbers(allow_nonnumeric_numbers) @@ -1703,6 +1704,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env, .normalize_whitespace(static_cast(normalize_whitespace)) .strict_validation(strict_validation) .mixed_types_as_string(mixed_types_as_string) + .prune_columns(false) .delimiter(static_cast(line_delimiter)) .keep_quotes(keep_quotes); if (strict_validation) { @@ -1818,6 +1820,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jboolean prune_columns, jbyte line_delimiter, jlong ds_handle) { @@ -1855,7 +1858,8 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env, .mixed_types_as_string(mixed_types_as_string) .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) - .keep_quotes(keep_quotes); + .keep_quotes(keep_quotes) + .prune_columns(prune_columns); if (strict_validation) { opts.numeric_leading_zeros(allow_leading_zeros) .nonnumeric_numbers(allow_nonnumeric_numbers) @@ -1915,6 +1919,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, jboolean allow_leading_zeros, jboolean allow_nonnumeric_numbers, jboolean allow_unquoted_control, + jboolean prune_columns, jbyte line_delimiter) { bool read_buffer = true; @@ -1966,7 +1971,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env, .mixed_types_as_string(mixed_types_as_string) .delimiter(static_cast(line_delimiter)) .strict_validation(strict_validation) - .keep_quotes(keep_quotes); + .keep_quotes(keep_quotes) + .prune_columns(prune_columns); if (strict_validation) { opts.numeric_leading_zeros(allow_leading_zeros) .nonnumeric_numbers(allow_nonnumeric_numbers)