From f0ba8598dd9792e137ca7aa3a1b22dbb84393cc1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Sep 2023 16:28:29 -0700 Subject: [PATCH 01/76] Pin to numpy<1.25 and numba<0.58 to avoid errors and deprecation warnings-as-errors. (#14156) Closes #14155. Related: #14160. (Will newer numpy support be backported to pandas 1.x? edit: no, see below) Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Benjamin Zaitlen (https://github.com/quasiben) - Ray Douglass (https://github.com/raydouglass) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/14156 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/environments/all_cuda-120_arch-x86_64.yaml | 4 ++-- conda/recipes/cudf/meta.yaml | 6 ++++-- dependencies.yaml | 8 +++++--- python/cudf/pyproject.toml | 6 +++--- python/cudf_kafka/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 4 ++-- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 692ba78f317..d4abc28cf13 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -55,8 +55,8 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.57 -- numpy>=1.21 +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 - numpydoc - nvcc_linux-64=11.8 - nvcomp==2.6.1 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index cf1bf4b8733..9a98e400e6d 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -54,8 +54,8 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.57 -- numpy>=1.21 +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 - numpydoc - nvcomp==2.6.1 - nvtx>=0.2.1 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index a909b72c878..54b687faa69 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -78,8 +78,10 @@ requirements: - typing_extensions >=4.0.0 - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - - numba >=0.57 - - numpy >=1.21 + # TODO: Pin to numba<0.58 until #14160 is resolved + - numba >=0.57,<0.58 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - numpy >=1.21,<1.25 - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/dependencies.yaml b/dependencies.yaml index 398ae193fe6..376e43094a7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -259,7 +259,8 @@ dependencies: # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - pyarrow==12.0.1.* - - numpy>=1.21 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - &numpy numpy>=1.21,<1.25 build_python: common: - output_types: [conda, requirements, pyproject] @@ -425,14 +426,15 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - fsspec>=0.6.0 - - numpy>=1.21 + - *numpy - pandas>=1.3,<1.6.0dev0 run_cudf: common: - output_types: [conda, requirements, pyproject] packages: - cachetools - - &numba numba>=0.57 + # TODO: Pin to numba<0.58 until #14160 is resolved + - &numba numba>=0.57,<0.58 - nvtx>=0.2.1 - packaging - rmm==23.10.* diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 574769f68d1..085d78afc7c 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cmake>=3.26.4", "cython>=3.0.0", "ninja", - "numpy>=1.21", + "numpy>=1.21,<1.25", "protoc-wheel", "pyarrow==12.0.1.*", "rmm==23.10.*", @@ -31,8 +31,8 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "numba>=0.57", - "numpy>=1.21", + "numba>=0.57,<0.58", + "numpy>=1.21,<1.25", "nvtx>=0.2.1", "packaging", "pandas>=1.3,<1.6.0dev0", diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index a6ef867451b..386cdc32ab1 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "cython>=3.0.0", - "numpy>=1.21", + "numpy>=1.21,<1.25", "pyarrow==12.0.1.*", "setuptools", "wheel", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 2464abca71a..922da366422 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "dask>=2023.7.1", "distributed>=2023.7.1", "fsspec>=0.6.0", - "numpy>=1.21", + "numpy>=1.21,<1.25", "pandas>=1.3,<1.6.0dev0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -40,7 +40,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ "dask-cuda==23.10.*", - "numba>=0.57", + "numba>=0.57,<0.58", "pytest", "pytest-cov", "pytest-xdist", From dd58dc4e9dae387c878afbe6cb32a311ce76fe68 Mon Sep 17 00:00:00 2001 From: Ben Jarmak <104460670+jarmak-nv@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:58:56 -0500 Subject: [PATCH 02/76] Remove outdated GitHub project actions (#14161) This PR removes two GitHub Actions that are no-longer needed: - `.github/workflows/add_to_project.yml` - This automatically adds issues and PRs to the cuDF/Dask/Numba/UCX project, but this is now a built-in functionality to projects - `.github/workflows/new-issues-to-triage-projects.yml` - This tries to add issues to a now closed project Authors: - Ben Jarmak (https://github.com/jarmak-nv) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) --- .github/workflows/add_to_project.yml | 20 ----------- .../new-issues-to-triage-projects.yml | 35 ------------------- 2 files changed, 55 deletions(-) delete mode 100644 .github/workflows/add_to_project.yml delete mode 100644 .github/workflows/new-issues-to-triage-projects.yml diff --git a/.github/workflows/add_to_project.yml b/.github/workflows/add_to_project.yml deleted file mode 100644 index b301c56a999..00000000000 --- a/.github/workflows/add_to_project.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Add new issue/PR to project - -on: - issues: - types: - - opened - - pull_request_target: - types: - - opened - -jobs: - add-to-project: - name: Add issue or PR to project - runs-on: ubuntu-latest - steps: - - uses: actions/add-to-project@v0.3.0 - with: - project-url: https://github.com/orgs/rapidsai/projects/51 - github-token: ${{ secrets.ADD_TO_PROJECT_GITHUB_TOKEN }} diff --git a/.github/workflows/new-issues-to-triage-projects.yml b/.github/workflows/new-issues-to-triage-projects.yml deleted file mode 100644 index cf9b0c379f1..00000000000 --- a/.github/workflows/new-issues-to-triage-projects.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Auto Assign New Issues to Triage Project - -on: - issues: - types: [opened] - -env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - -jobs: - assign_one_project: - runs-on: ubuntu-latest - name: Assign to New Issues to Triage Project - steps: - - name: Process bug issues - uses: docker://takanabe/github-actions-automate-projects:v0.0.1 - if: contains(github.event.issue.labels.*.name, 'bug') && contains(github.event.issue.labels.*.name, '? - Needs Triage') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/1 - GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' - - name: Process feature issues - uses: docker://takanabe/github-actions-automate-projects:v0.0.1 - if: contains(github.event.issue.labels.*.name, 'feature request') && contains(github.event.issue.labels.*.name, '? - Needs Triage') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/9 - GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' - - name: Process other issues - uses: docker://takanabe/github-actions-automate-projects:v0.0.1 - if: contains(github.event.issue.labels.*.name, '? - Needs Triage') && (!contains(github.event.issue.labels.*.name, 'bug') && !contains(github.event.issue.labels.*.name, 'feature request')) - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/10 - GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing' From f42231f9193952b45cd9ba4642adcca392a7ce14 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Fri, 22 Sep 2023 10:19:16 -0400 Subject: [PATCH 03/76] v23.12 Updates [skip ci] --- .github/workflows/build.yaml | 16 +++++------ .github/workflows/pr.yaml | 28 +++++++++---------- .github/workflows/test.yaml | 16 +++++------ README.md | 2 +- ci/build_docs.sh | 2 +- ci/check_style.sh | 2 +- ci/test_wheel_dask_cudf.sh | 2 +- .../all_cuda-118_arch-x86_64.yaml | 8 +++--- .../all_cuda-120_arch-x86_64.yaml | 8 +++--- cpp/CMakeLists.txt | 2 +- cpp/doxygen/Doxyfile | 4 +-- cpp/examples/basic/CMakeLists.txt | 2 +- cpp/examples/strings/CMakeLists.txt | 2 +- cpp/libcudf_kafka/CMakeLists.txt | 2 +- dependencies.yaml | 16 +++++------ docs/cudf/source/conf.py | 4 +-- docs/dask_cudf/source/conf.py | 4 +-- fetch_rapids.cmake | 2 +- java/ci/README.md | 4 +-- java/pom.xml | 2 +- java/src/main/native/CMakeLists.txt | 2 +- python/cudf/CMakeLists.txt | 2 +- python/cudf/cudf/__init__.py | 2 +- python/cudf/pyproject.toml | 6 ++-- python/cudf_kafka/pyproject.toml | 4 +-- python/custreamz/pyproject.toml | 6 ++-- python/dask_cudf/dask_cudf/__init__.py | 2 +- python/dask_cudf/pyproject.toml | 6 ++-- 28 files changed, 79 insertions(+), 79 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0e120d34bb1..ab028eb89cc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: ${{ inputs.build_type || 'branch' }} @@ -100,7 +100,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 054ea7968c8..214f9c90b41 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,34 +26,34 @@ jobs: - wheel-build-dask-cudf - wheel-tests-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.12 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_cudf.sh" @@ -61,14 +61,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -78,7 +78,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -88,7 +88,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -98,21 +98,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-tests-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request @@ -120,7 +120,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 030f2e41db4..9ca32bcfe03 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -36,7 +36,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: test_script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: nightly diff --git a/README.md b/README.md index 64c980d0cb3..5f2ce014dba 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ cuDF can be installed with conda (via [miniconda](https://conda.io/miniconda.htm ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=23.10 python=3.10 cuda-version=11.8 + cudf=23.12 python=3.10 cuda-version=11.8 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 1ed047a500b..11e7a96b751 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -25,7 +25,7 @@ rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ libcudf cudf dask-cudf -export RAPIDS_VERSION_NUMBER="23.10" +export RAPIDS_VERSION_NUMBER="23.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" diff --git a/ci/check_style.sh b/ci/check_style.sh index e96ad8bf1db..a01cf4dcc6b 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,7 +14,7 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --force -f env.yaml -n checks conda activate checks -FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/cmake-format-rapids-cmake.json +FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/cmake-format-rapids-cmake.json export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index d6e7f4bf65e..050aa4561c7 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -11,7 +11,7 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from python -m pip install --no-deps ./local-cudf-dep/cudf*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 +python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/dask_cudf*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index d4abc28cf13..151d250d7e9 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -25,7 +25,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dask-core>=2023.7.1 -- dask-cuda==23.10.* +- dask-cuda==23.12.* - dask>=2023.7.1 - distributed>=2023.7.1 - dlpack>=0.5,<0.6.0a0 @@ -44,9 +44,9 @@ dependencies: - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==23.10.* +- libkvikio==23.12.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==23.10.* +- librmm==23.12.* - make - mimesis>=4.1.0 - moto>=4.0.8 @@ -80,7 +80,7 @@ dependencies: - python-snappy>=0.6.0 - python>=3.9,<3.11 - pytorch<1.12.0 -- rmm==23.10.* +- rmm==23.12.* - s3fs>=2022.3.0 - scikit-build>=0.13.1 - scipy diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 9a98e400e6d..ad52b8f8b97 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - dask-core>=2023.7.1 -- dask-cuda==23.10.* +- dask-cuda==23.12.* - dask>=2023.7.1 - distributed>=2023.7.1 - dlpack>=0.5,<0.6.0a0 @@ -43,9 +43,9 @@ dependencies: - libarrow==12.0.1.* - libcufile-dev - libcurand-dev -- libkvikio==23.10.* +- libkvikio==23.12.* - librdkafka>=1.9.0,<1.10.0a0 -- librmm==23.10.* +- librmm==23.12.* - make - mimesis>=4.1.0 - moto>=4.0.8 @@ -77,7 +77,7 @@ dependencies: - python-snappy>=0.6.0 - python>=3.9,<3.11 - pytorch<1.12.0 -- rmm==23.10.* +- rmm==23.12.* - s3fs>=2022.3.0 - scikit-build>=0.13.1 - scipy diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a84f7bd5224..38713d99622 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF) project( CUDF - VERSION 23.10.00 + VERSION 23.12.00 LANGUAGES C CXX CUDA ) if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index b072d252881..adefaaa1479 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = libcudf # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 23.10.00 +PROJECT_NUMBER = 23.12.00 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -2226,7 +2226,7 @@ SKIP_FUNCTION_MACROS = YES # the path). If a tag file is not located in the directory in which doxygen is # run, you must also specify the path to the tagfile here. -TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/23.10 +TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/23.12 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a # tag file that is based on the input files it reads. See section "Linking to diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index 1c1952c4616..9ff716f41e4 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -16,7 +16,7 @@ file( ) include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) -set(CUDF_TAG branch-23.10) +set(CUDF_TAG branch-23.12) CPMFindPackage( NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf GIT_TAG ${CUDF_TAG} diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt index 31a6b12a4bc..4b500d3a92e 100644 --- a/cpp/examples/strings/CMakeLists.txt +++ b/cpp/examples/strings/CMakeLists.txt @@ -16,7 +16,7 @@ file( ) include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) -set(CUDF_TAG branch-23.10) +set(CUDF_TAG branch-23.12) CPMFindPackage( NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf GIT_TAG ${CUDF_TAG} diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 33bd04fffb3..1a15a3ec2cd 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -22,7 +22,7 @@ include(rapids-find) project( CUDA_KAFKA - VERSION 23.10.00 + VERSION 23.12.00 LANGUAGES CXX ) diff --git a/dependencies.yaml b/dependencies.yaml index 376e43094a7..66417b214ff 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -214,8 +214,8 @@ dependencies: common: - output_types: [conda, requirements] packages: - - librmm==23.10.* - - libkvikio==23.10.* + - librmm==23.12.* + - libkvikio==23.12.* - output_types: conda packages: - fmt>=9.1.0,<10 @@ -266,7 +266,7 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - scikit-build>=0.13.1 - - rmm==23.10.* + - rmm==23.12.* - output_types: conda packages: - &protobuf protobuf>=4.21,<5 @@ -385,7 +385,7 @@ dependencies: common: - output_types: [conda] packages: - - dask-cuda==23.10.* + - dask-cuda==23.12.* - *doxygen - make - myst-nb @@ -437,7 +437,7 @@ dependencies: - &numba numba>=0.57,<0.58 - nvtx>=0.2.1 - packaging - - rmm==23.10.* + - rmm==23.12.* - typing_extensions>=4.0.0 - *protobuf - output_types: conda @@ -498,7 +498,7 @@ dependencies: - dask-core>=2023.7.1 # dask-core in conda is the actual package & dask is the meta package - output_types: pyproject packages: - - &cudf cudf==23.10.* + - &cudf cudf==23.12.* - *cupy_pip run_cudf_kafka: common: @@ -517,7 +517,7 @@ dependencies: packages: - confluent-kafka>=1.9.0,<1.10.0a0 - *cudf - - cudf_kafka==23.10.* + - cudf_kafka==23.12.* test_cpp: common: - output_types: conda @@ -599,5 +599,5 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask-cuda==23.10.* + - dask-cuda==23.12.* - *numba diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 03b1bb7039b..acb2a5d17f3 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -79,9 +79,9 @@ # built documents. # # The short X.Y version. -version = '23.10' +version = '23.12' # The full version, including alpha/beta/rc tags. -release = '23.10.00' +release = '23.12.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/dask_cudf/source/conf.py b/docs/dask_cudf/source/conf.py index ad629b5e949..6861a9b90f6 100644 --- a/docs/dask_cudf/source/conf.py +++ b/docs/dask_cudf/source/conf.py @@ -11,8 +11,8 @@ project = "dask-cudf" copyright = "2018-2023, NVIDIA Corporation" author = "NVIDIA Corporation" -version = '23.10' -release = '23.10.00' +version = '23.12' +release = '23.12.00' language = "en" diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 4a68c7dbc60..e79d9d86fce 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake ) endif() diff --git a/java/ci/README.md b/java/ci/README.md index e9599b33bf1..12a2bb2dc51 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-centos7 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-23.10 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-23.12 ``` ### Build cuDF jar with devtoolset @@ -47,4 +47,4 @@ scl enable devtoolset-11 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-23.10.0-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-23.12.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index afcc0e15a2c..cc880312d34 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ ai.rapids cudf - 23.10.0-SNAPSHOT + 23.12.0-SNAPSHOT cudfjni diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 128989fe77f..0dcfee2cffe 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -28,7 +28,7 @@ rapids_cuda_init_architectures(CUDF_JNI) project( CUDF_JNI - VERSION 23.10.00 + VERSION 23.12.00 LANGUAGES C CXX CUDA ) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 6f3e428d291..a8b91c27095 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) -set(cudf_version 23.10.00) +set(cudf_version 23.12.00) include(../../fetch_rapids.cmake) include(rapids-cuda) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index e5c78fca893..8d25d478676 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -99,7 +99,7 @@ rmm.register_reinitialize_hook(clear_cache) -__version__ = "23.10.00" +__version__ = "23.12.00" __all__ = [ "BaseIndex", diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 085d78afc7c..39a8dca0267 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -9,7 +9,7 @@ requires = [ "numpy>=1.21,<1.25", "protoc-wheel", "pyarrow==12.0.1.*", - "rmm==23.10.*", + "rmm==23.12.*", "scikit-build>=0.13.1", "setuptools", "wheel", @@ -17,7 +17,7 @@ requires = [ [project] name = "cudf" -version = "23.10.00" +version = "23.12.00" description = "cuDF - GPU Dataframe" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -39,7 +39,7 @@ dependencies = [ "protobuf>=4.21,<5", "ptxcompiler", "pyarrow==12.*", - "rmm==23.10.*", + "rmm==23.12.*", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 386cdc32ab1..78a7a83ac3a 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -12,7 +12,7 @@ requires = [ [project] name = "cudf_kafka" -version = "23.10.00" +version = "23.12.00" description = "cuDF Kafka Datasource" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -21,7 +21,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "cudf==23.10.*", + "cudf==23.12.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 47ade91b5eb..e6328ed045d 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "custreamz" -version = "23.10.00" +version = "23.12.00" description = "cuStreamz - GPU Accelerated Streaming" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -19,8 +19,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "confluent-kafka>=1.9.0,<1.10.0a0", - "cudf==23.10.*", - "cudf_kafka==23.10.*", + "cudf==23.12.*", + "cudf_kafka==23.12.*", "streamz", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py index 6952c3d5882..7c81f5da481 100644 --- a/python/dask_cudf/dask_cudf/__init__.py +++ b/python/dask_cudf/dask_cudf/__init__.py @@ -14,7 +14,7 @@ except ImportError: pass -__version__ = "23.10.00" +__version__ = "23.12.00" __all__ = [ "DataFrame", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 922da366422..08441c6b5f7 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "dask_cudf" -version = "23.10.00" +version = "23.12.00" description = "Utilities for Dask and cuDF interactions" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -18,7 +18,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "cudf==23.10.*", + "cudf==23.12.*", "cupy-cuda11x>=12.0.0", "dask>=2023.7.1", "distributed>=2023.7.1", @@ -39,7 +39,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ - "dask-cuda==23.10.*", + "dask-cuda==23.12.*", "numba>=0.57,<0.58", "pytest", "pytest-cov", From 98b1bc6c1ef1233a6c71c3b24fc8f88d591a4639 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:07:37 -0400 Subject: [PATCH 04/76] Fix calls to copy_bitmask to pass stream parameter (#14158) Fixes a couple places where `cudf::copy_bitmask` was called instead of `cudf::detail::copy_bitmask` to pass the available stream (and mr) parameters. Found while reviewing #14121 Reference: https://github.com/rapidsai/cudf/pull/14121#discussion_r1332332391 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/14158 --- cpp/src/lists/count_elements.cu | 12 ++++++------ cpp/src/replace/clamp.cu | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index f8e7b4c6126..40a14d805e1 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,12 +36,12 @@ namespace cudf { namespace lists { namespace detail { /** - * @brief Returns a numeric column containing lengths of each element. + * @brief Returns a numeric column containing lengths of each element * - * @param input Input lists column. - * @param stream CUDA stream used for device memory operations and kernel launches. + * @param input Input lists column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New INT32 column with lengths. + * @return New size_type column with lengths */ std::unique_ptr count_elements(lists_column_view const& input, rmm::cuda_stream_view stream, @@ -52,7 +52,7 @@ std::unique_ptr count_elements(lists_column_view const& input, // create output column auto output = make_fixed_width_column(data_type{type_to_id()}, input.size(), - copy_bitmask(input.parent()), + cudf::detail::copy_bitmask(input.parent(), stream, mr), input.null_count(), stream, mr); diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index 2b48aed2d29..950cb484ddf 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -163,7 +163,9 @@ std::enable_if_t(), std::unique_ptr> clamp auto output = detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); // mask will not change - if (input.nullable()) { output->set_null_mask(copy_bitmask(input), input.null_count()); } + if (input.nullable()) { + output->set_null_mask(cudf::detail::copy_bitmask(input, stream, mr), input.null_count()); + } auto output_device_view = cudf::mutable_column_device_view::create(output->mutable_view(), stream); From f865c871cd0f9b9c596476d9d98aafaf9cc46bb1 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:08:11 -0400 Subject: [PATCH 05/76] Expose stream parameter in public nvtext ngram APIs (#14061) Add stream parameter to public APIs: - `nvtext::generate_ngrams()` - `nvtext::generate_character_ngrams()` - `nvtext::hash_character_ngrams()` - `nvtext::ngrams_tokenize()` Also cleaned up some of the doxygen comments. And also fixed a spelling mistake in the jaccard.cu source that was bothering me. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14061 --- cpp/benchmarks/text/ngrams.cpp | 3 +- cpp/benchmarks/text/tokenize.cpp | 7 ++- cpp/include/nvtext/generate_ngrams.hpp | 38 ++++++++------- cpp/include/nvtext/ngrams_tokenize.hpp | 28 +++++------ cpp/src/text/generate_ngrams.cu | 9 ++-- cpp/src/text/jaccard.cu | 4 +- cpp/src/text/ngrams_tokenize.cu | 4 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/text/ngrams_test.cpp | 59 ++++++++++++++++++++++++ cpp/tests/text/ngrams_tests.cpp | 28 ++++++----- cpp/tests/text/ngrams_tokenize_tests.cpp | 11 +++-- 11 files changed, 135 insertions(+), 57 deletions(-) create mode 100644 cpp/tests/streams/text/ngrams_test.cpp diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index 0319577f6b9..f3fd5cc5729 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -36,11 +36,12 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt) cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); cudf::strings_column_view input(column->view()); + auto const separator = cudf::string_scalar("_"); for (auto _ : state) { cuda_event_timer raii(state, true); switch (nt) { - case ngrams_type::tokens: nvtext::generate_ngrams(input); break; + case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break; case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; } } diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index 423fe667b05..b556a84c541 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -67,8 +67,11 @@ static void bench_tokenize(nvbench::state& state) auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters)); }); } else if (tokenize_type == "ngrams") { - state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); }); + auto const delimiter = cudf::string_scalar(""); + auto const separator = cudf::string_scalar("_"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::ngrams_tokenize(input, 2, delimiter, separator); + }); } else if (tokenize_type == "characters") { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); }); diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp index 5d66401df9d..46f2c0e7bc9 100644 --- a/cpp/include/nvtext/generate_ngrams.hpp +++ b/cpp/include/nvtext/generate_ngrams.hpp @@ -47,19 +47,19 @@ namespace nvtext { * @throw cudf::logic_error if `separator` is invalid * @throw cudf::logic_error if there are not enough strings to generate any ngrams * - * @param strings Strings column to tokenize and produce ngrams from. - * @param ngrams The ngram number to generate. - * Default is 2 = bigram. - * @param separator The string to use for separating ngram tokens. - * Default is "_" character. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings columns of tokens. + * @param input Strings column to tokenize and produce ngrams from + * @param ngrams The ngram number to generate + * @param separator The string to use for separating ngram tokens + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings columns of tokens */ std::unique_ptr generate_ngrams( - cudf::strings_column_view const& strings, - cudf::size_type ngrams = 2, - cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::strings_column_view const& input, + cudf::size_type ngrams, + cudf::string_scalar const& separator, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Generates ngrams of characters within each string. @@ -79,15 +79,17 @@ std::unique_ptr generate_ngrams( * @throw cudf::logic_error if `ngrams < 2` * @throw cudf::logic_error if there are not enough characters to generate any ngrams * - * @param strings Strings column to produce ngrams from. + * @param input Strings column to produce ngrams from * @param ngrams The ngram number to generate. * Default is 2 = bigram. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings columns of tokens. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings columns of tokens */ std::unique_ptr generate_character_ngrams( - cudf::strings_column_view const& strings, + cudf::strings_column_view const& input, cudf::size_type ngrams = 2, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -113,14 +115,16 @@ std::unique_ptr generate_character_ngrams( * @throw cudf::logic_error if `ngrams < 2` * @throw cudf::logic_error if there are not enough characters to generate any ngrams * - * @param strings Strings column to produce ngrams from. + * @param input Strings column to produce ngrams from * @param ngrams The ngram number to generate. Default is 5. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return A lists column of hash values */ std::unique_ptr hash_character_ngrams( - cudf::strings_column_view const& strings, + cudf::strings_column_view const& input, cudf::size_type ngrams = 5, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp index 17f20f7ea4c..9d76ef8689f 100644 --- a/cpp/include/nvtext/ngrams_tokenize.hpp +++ b/cpp/include/nvtext/ngrams_tokenize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,22 +66,22 @@ namespace nvtext { * * All null row entries are ignored and the output contains all valid rows. * - * @param strings Strings column to tokenize and produce ngrams from. - * @param ngrams The ngram number to generate. - * Default is 2 = bigram. + * @param input Strings column to tokenize and produce ngrams from + * @param ngrams The ngram number to generate * @param delimiter UTF-8 characters used to separate each string into tokens. - * The default of empty string will separate tokens using whitespace. - * @param separator The string to use for separating ngram tokens. - * Default is "_" character. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings columns of tokens. + * An empty string will separate tokens using whitespace. + * @param separator The string to use for separating ngram tokens + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings columns of tokens */ std::unique_ptr ngrams_tokenize( - cudf::strings_column_view const& strings, - cudf::size_type ngrams = 2, - cudf::string_scalar const& delimiter = cudf::string_scalar{""}, - cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::strings_column_view const& input, + cudf::size_type ngrams, + cudf::string_scalar const& delimiter, + cudf::string_scalar const& separator, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace nvtext diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 938fd45246d..5f2f4d021a4 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -150,10 +150,11 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s std::unique_ptr generate_ngrams(cudf::strings_column_view const& strings, cudf::size_type ngrams, cudf::string_scalar const& separator, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::generate_ngrams(strings, ngrams, separator, cudf::get_default_stream(), mr); + return detail::generate_ngrams(strings, ngrams, separator, stream, mr); } namespace detail { @@ -317,18 +318,20 @@ std::unique_ptr hash_character_ngrams(cudf::strings_column_view co std::unique_ptr generate_character_ngrams(cudf::strings_column_view const& strings, cudf::size_type ngrams, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::generate_character_ngrams(strings, ngrams, cudf::get_default_stream(), mr); + return detail::generate_character_ngrams(strings, ngrams, stream, mr); } std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& strings, cudf::size_type ngrams, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hash_character_ngrams(strings, ngrams, cudf::get_default_stream(), mr); + return detail::hash_character_ngrams(strings, ngrams, stream, mr); } } // namespace nvtext diff --git a/cpp/src/text/jaccard.cu b/cpp/src/text/jaccard.cu index 5b55745c2c7..95324847ea0 100644 --- a/cpp/src/text/jaccard.cu +++ b/cpp/src/text/jaccard.cu @@ -107,7 +107,7 @@ rmm::device_uvector compute_unique_counts(cudf::column_view con * * This is called with a warp per row */ -struct sorted_interset_fn { +struct sorted_intersect_fn { cudf::column_device_view const d_input1; cudf::column_device_view const d_input2; cudf::size_type* d_results; @@ -151,7 +151,7 @@ rmm::device_uvector compute_intersect_counts(cudf::column_view auto const d_input1 = cudf::column_device_view::create(input1, stream); auto const d_input2 = cudf::column_device_view::create(input2, stream); auto d_results = rmm::device_uvector(input1.size(), stream); - sorted_interset_fn fn{*d_input1, *d_input2, d_results.data()}; + sorted_intersect_fn fn{*d_input1, *d_input2, d_results.data()}; thrust::for_each_n(rmm::exec_policy(stream), thrust::counting_iterator(0), input1.size() * cudf::detail::warp_size, diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index fd1cbf99221..73d85513e95 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -265,11 +265,11 @@ std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& s cudf::size_type ngrams, cudf::string_scalar const& delimiter, cudf::string_scalar const& separator, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ngrams_tokenize( - strings, ngrams, delimiter, separator, cudf::get_default_stream(), mr); + return detail::ngrams_tokenize(strings, ngrams, delimiter, separator, stream, mr); } } // namespace nvtext diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d1e50442058..ba4921848d7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -632,6 +632,7 @@ ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE testing ) +ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/text/ngrams_test.cpp b/cpp/tests/streams/text/ngrams_test.cpp new file mode 100644 index 00000000000..bce0d2b680b --- /dev/null +++ b/cpp/tests/streams/text/ngrams_test.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +class TextNGramsTest : public cudf::test::BaseFixture {}; + +TEST_F(TextNGramsTest, GenerateNgrams) +{ + auto const input = + cudf::test::strings_column_wrapper({"the", "fox", "jumped", "over", "thé", "dog"}); + auto const separator = cudf::string_scalar{"_", true, cudf::test::get_default_stream()}; + nvtext::generate_ngrams( + cudf::strings_column_view(input), 3, separator, cudf::test::get_default_stream()); +} + +TEST_F(TextNGramsTest, GenerateCharacterNgrams) +{ + auto const input = + cudf::test::strings_column_wrapper({"the", "fox", "jumped", "over", "thé", "dog"}); + nvtext::generate_character_ngrams( + cudf::strings_column_view(input), 3, cudf::test::get_default_stream()); +} + +TEST_F(TextNGramsTest, HashCharacterNgrams) +{ + auto input = + cudf::test::strings_column_wrapper({"the quick brown fox", "jumped over the lazy dog."}); + nvtext::hash_character_ngrams( + cudf::strings_column_view(input), 5, cudf::test::get_default_stream()); +} + +TEST_F(TextNGramsTest, NgramsTokenize) +{ + auto input = + cudf::test::strings_column_wrapper({"the quick brown fox", "jumped over the lazy dog."}); + auto const delimiter = cudf::string_scalar{" ", true, cudf::test::get_default_stream()}; + auto const separator = cudf::string_scalar{"_", true, cudf::test::get_default_stream()}; + nvtext::ngrams_tokenize( + cudf::strings_column_view(input), 2, delimiter, separator, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp index 323b3eed3e2..7b179588385 100644 --- a/cpp/tests/text/ngrams_tests.cpp +++ b/cpp/tests/text/ngrams_tests.cpp @@ -34,18 +34,19 @@ TEST_F(TextGenerateNgramsTest, Ngrams) { cudf::test::strings_column_wrapper strings{"the", "fox", "jumped", "over", "thé", "dog"}; cudf::strings_column_view strings_view(strings); + auto const separator = cudf::string_scalar("_"); { cudf::test::strings_column_wrapper expected{ "the_fox", "fox_jumped", "jumped_over", "over_thé", "thé_dog"}; - auto const results = nvtext::generate_ngrams(strings_view); + auto const results = nvtext::generate_ngrams(strings_view, 2, separator); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { cudf::test::strings_column_wrapper expected{ "the_fox_jumped", "fox_jumped_over", "jumped_over_thé", "over_thé_dog"}; - auto const results = nvtext::generate_ngrams(strings_view, 3); + auto const results = nvtext::generate_ngrams(strings_view, 3, separator); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { @@ -83,10 +84,11 @@ TEST_F(TextGenerateNgramsTest, NgramsWithNulls) h_strings.begin(), h_strings.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + auto const separator = cudf::string_scalar("_"); cudf::strings_column_view strings_view(strings); { - auto const results = nvtext::generate_ngrams(strings_view, 3); + auto const results = nvtext::generate_ngrams(strings_view, 3, separator); cudf::test::strings_column_wrapper expected{ "the_fox_jumped", "fox_jumped_over", "jumped_over_the", "over_the_dog"}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); @@ -103,7 +105,10 @@ TEST_F(TextGenerateNgramsTest, Empty) { auto const zero_size_strings_column = cudf::make_empty_column(cudf::type_id::STRING)->view(); - auto results = nvtext::generate_ngrams(cudf::strings_column_view(zero_size_strings_column)); + auto const separator = cudf::string_scalar("_"); + + auto results = + nvtext::generate_ngrams(cudf::strings_column_view(zero_size_strings_column), 2, separator); cudf::test::expect_column_empty(results->view()); results = nvtext::generate_character_ngrams(cudf::strings_column_view(zero_size_strings_column)); cudf::test::expect_column_empty(results->view()); @@ -112,21 +117,20 @@ TEST_F(TextGenerateNgramsTest, Empty) TEST_F(TextGenerateNgramsTest, Errors) { cudf::test::strings_column_wrapper strings{""}; + auto const separator = cudf::string_scalar("_"); // invalid parameter value - EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings), 1), cudf::logic_error); + EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings), 1, separator), + cudf::logic_error); EXPECT_THROW(nvtext::generate_character_ngrams(cudf::strings_column_view(strings), 1), cudf::logic_error); // not enough strings to generate ngrams - EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings), 3), cudf::logic_error); + EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings), 3, separator), + cudf::logic_error); EXPECT_THROW(nvtext::generate_character_ngrams(cudf::strings_column_view(strings), 3), cudf::logic_error); - std::vector h_strings{"", nullptr, "", nullptr}; - cudf::test::strings_column_wrapper strings_no_tokens( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings_no_tokens)), + cudf::test::strings_column_wrapper strings_no_tokens({"", "", "", ""}, {1, 0, 1, 0}); + EXPECT_THROW(nvtext::generate_ngrams(cudf::strings_column_view(strings_no_tokens), 2, separator), cudf::logic_error); EXPECT_THROW(nvtext::generate_character_ngrams(cudf::strings_column_view(strings_no_tokens)), cudf::logic_error); diff --git a/cpp/tests/text/ngrams_tokenize_tests.cpp b/cpp/tests/text/ngrams_tokenize_tests.cpp index 5879bec3e64..c6fb886f7e5 100644 --- a/cpp/tests/text/ngrams_tokenize_tests.cpp +++ b/cpp/tests/text/ngrams_tokenize_tests.cpp @@ -62,7 +62,7 @@ TEST_F(TextNgramsTokenizeTest, Tokenize) "mousé_ate", "ate_the", "the_cheese"}; - auto results = nvtext::ngrams_tokenize(strings_view); + auto results = nvtext::ngrams_tokenize(strings_view, 2, std::string(), std::string("_")); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { @@ -101,9 +101,10 @@ TEST_F(TextNgramsTokenizeTest, TokenizeOneGram) { cudf::test::strings_column_wrapper strings{"aaa bbb", " ccc ddd ", "eee"}; cudf::strings_column_view strings_view(strings); + auto const empty = cudf::string_scalar(""); cudf::test::strings_column_wrapper expected{"aaa", "bbb", "ccc", "ddd", "eee"}; - auto results = nvtext::ngrams_tokenize(strings_view, 1); + auto results = nvtext::ngrams_tokenize(strings_view, 1, empty, empty); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } @@ -111,7 +112,8 @@ TEST_F(TextNgramsTokenizeTest, TokenizeEmptyTest) { auto strings = cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}); cudf::strings_column_view strings_view(strings->view()); - auto results = nvtext::ngrams_tokenize(strings_view); + auto const empty = cudf::string_scalar(""); + auto results = nvtext::ngrams_tokenize(strings_view, 2, empty, empty); EXPECT_EQ(results->size(), 0); EXPECT_EQ(results->has_nulls(), false); } @@ -120,5 +122,6 @@ TEST_F(TextNgramsTokenizeTest, TokenizeErrorTest) { cudf::test::strings_column_wrapper strings{"this column intentionally left blank"}; cudf::strings_column_view strings_view(strings); - EXPECT_THROW(nvtext::ngrams_tokenize(strings_view, 0), cudf::logic_error); + auto const empty = cudf::string_scalar(""); + EXPECT_THROW(nvtext::ngrams_tokenize(strings_view, 0, empty, empty), cudf::logic_error); } From a6d014e632ecad86cef486402dbe53acee191a1d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 22 Sep 2023 16:24:33 +0100 Subject: [PATCH 06/76] Support callables in DataFrame.assign (#14142) While here, change the way the initial copied frame is constructed: callables are allowed to refer to columns already in the dataframe, even if they overwrite them. - Closes #12936 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/14142 --- python/cudf/cudf/core/dataframe.py | 23 ++++++++++++++--------- python/cudf/cudf/tests/test_dataframe.py | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1a780cc9e9f..8a3dbe77787 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1390,10 +1390,21 @@ def _get_numeric_data(self): return self[columns] @_cudf_nvtx_annotate - def assign(self, **kwargs): + def assign(self, **kwargs: Union[Callable[[Self], Any], Any]): """ Assign columns to DataFrame from keyword arguments. + Parameters + ---------- + **kwargs: dict mapping string column names to values + The value for each key can either be a literal column (or + something that can be converted to a column), or + a callable of one argument that will be given the + dataframe as an argument and should return the new column + (without modifying the input argument). + Columns are added in-order, so callables can refer to + column names constructed in the assignment. + Examples -------- >>> import cudf @@ -1405,15 +1416,9 @@ def assign(self, **kwargs): 1 1 4 2 2 5 """ - new_df = cudf.DataFrame(index=self.index.copy()) - for name, col in self._data.items(): - if name in kwargs: - new_df[name] = kwargs.pop(name) - else: - new_df._data[name] = col.copy() - + new_df = self.copy(deep=False) for k, v in kwargs.items(): - new_df[k] = v + new_df[k] = v(new_df) if callable(v) else v return new_df @classmethod diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 6180162ecdd..2f531afdeb7 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1327,6 +1327,25 @@ def test_assign(): np.testing.assert_equal(gdf2.y.to_numpy(), [2, 3, 4]) +@pytest.mark.parametrize( + "mapping", + [ + {"y": 1, "z": lambda df: df["x"] + df["y"]}, + { + "x": lambda df: df["x"] * 2, + "y": lambda df: 2, + "z": lambda df: df["x"] / df["y"], + }, + ], +) +def test_assign_callable(mapping): + df = pd.DataFrame({"x": [1, 2, 3]}) + cdf = cudf.from_pandas(df) + expect = df.assign(**mapping) + actual = cdf.assign(**mapping) + assert_eq(expect, actual) + + @pytest.mark.parametrize("nrows", [1, 8, 100, 1000]) @pytest.mark.parametrize("method", ["murmur3", "md5"]) @pytest.mark.parametrize("seed", [None, 42]) From 40bdd8ae4d89d2ea1f466c579d56f2c9ca1b014d Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 22 Sep 2023 19:20:18 +0200 Subject: [PATCH 07/76] Pin to `aws-sdk-cpp<1.11` (#14173) Pin conda packages to `aws-sdk-cpp<1.11`. The recent upgrade in version `1.11.*` has caused several issues with cleaning up (more details on changes can be read in [this link](https://github.com/aws/aws-sdk-cpp#version-111-is-now-available)), leading to Distributed and Dask-CUDA processes to segfault. The stack for one of those crashes looks like the following: ``` (gdb) bt #0 0x00007f5125359a0c in Aws::Utils::Logging::s_aws_logger_redirect_get_log_level(aws_logger*, unsigned int) () from /opt/conda/envs/dask/lib/python3.9/site-packages/pyarrow/../../.././libaws-cpp-sdk-core.so #1 0x00007f5124968f83 in aws_event_loop_thread () from /opt/conda/envs/dask/lib/python3.9/site-packages/pyarrow/../../../././libaws-c-io.so.1.0.0 #2 0x00007f5124ad9359 in thread_fn () from /opt/conda/envs/dask/lib/python3.9/site-packages/pyarrow/../../../././libaws-c-common.so.1 #3 0x00007f519958f6db in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #4 0x00007f5198b1361f in clone () from /lib/x86_64-linux-gnu/libc.so.6 ``` Such segfaults now manifest frequently in CI, and in some cases are reproducible with a hit rate of ~30%. Given the approaching release time, it's probably the safest option to just pin to an older version of the package while we don't pinpoint the exact cause for the issue and a patched build is released upstream. The `aws-sdk-cpp` is statically-linked in the `pyarrow` pip package, which prevents us from using the same pinning technique. cuDF is currently pinned to `pyarrow=12.0.1` which seems to be built against `aws-sdk-cpp=1.10.*`, as per [recent build logs](https://github.com/apache/arrow/actions/runs/6276453828/job/17046177335?pr=37792#step:6:1372). Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/14173 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 + conda/environments/all_cuda-120_arch-x86_64.yaml | 1 + conda/recipes/libcudf/conda_build_config.yaml | 3 +++ conda/recipes/libcudf/meta.yaml | 2 ++ dependencies.yaml | 1 + 5 files changed, 8 insertions(+) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index d4abc28cf13..9fb991f9075 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,6 +9,7 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 +- aws-sdk-cpp<1.11 - benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 9a98e400e6d..9ba0dd8dc38 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -9,6 +9,7 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 +- aws-sdk-cpp<1.11 - benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 25b3f19de77..b1f5b083e06 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -22,6 +22,9 @@ gbench_version: gtest_version: - ">=1.13.0" +aws_sdk_cpp_version: + - "<1.11" + libarrow_version: - "=12" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 627065817ba..28357f0d96d 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -74,6 +74,7 @@ requirements: - gtest {{ gtest_version }} - gmock {{ gtest_version }} - zlib {{ zlib_version }} + - aws-sdk-cpp {{ aws_sdk_cpp_version }} outputs: - name: libcudf @@ -107,6 +108,7 @@ outputs: - dlpack {{ dlpack_version }} - gtest {{ gtest_version }} - gmock {{ gtest_version }} + - aws-sdk-cpp {{ aws_sdk_cpp_version }} test: commands: - test -f $PREFIX/lib/libcudf.so diff --git a/dependencies.yaml b/dependencies.yaml index 376e43094a7..5586f54348c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -218,6 +218,7 @@ dependencies: - libkvikio==23.10.* - output_types: conda packages: + - aws-sdk-cpp<1.11 - fmt>=9.1.0,<10 - &gbench benchmark==1.8.0 - >est gtest>=1.13.0 From c7dd6b48684028a65b1d19d5d5b04060f6a4fe19 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:15:31 -0400 Subject: [PATCH 08/76] Refactor libcudf indexalator to typed normalator (#14043) Creates generic normalizing-iterator for integer types for use by the `indexalator` and the future offsets normalizing iterator. Mostly code has been moved around or renamed so the normalizing-iterator part can take type template parameter to identify which integer type to normalize to. For the `indexalator`, this type is `cudf::size_type` and for the offsets iterator this type would be `int64`. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/14043 --- cpp/include/cudf/detail/indexalator.cuh | 332 +--------------- .../cudf/detail/normalizing_iterator.cuh | 367 ++++++++++++++++++ 2 files changed, 374 insertions(+), 325 deletions(-) create mode 100644 cpp/include/cudf/detail/normalizing_iterator.cuh diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 4731c4919e3..6532dae3695 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -16,14 +16,13 @@ #pragma once +#include + #include #include #include #include -#include -#include -#include #include #include #include @@ -32,193 +31,6 @@ namespace cudf { namespace detail { -/** - * @brief The base class for the input or output index normalizing iterator. - * - * This implementation uses CRTP to define the `input_indexalator` and the - * `output_indexalator` classes. This is so this class can manipulate the - * uniquely typed subclass member variable `p_` directly without requiring - * virtual functions since iterator instances will be copied to device memory. - * - * The base class mainly manages updating the `p_` member variable while the - * subclasses handle accessing individual elements in device memory. - * - * @tparam T The derived class type for the iterator. - */ -template -struct base_indexalator { - using difference_type = ptrdiff_t; - using value_type = size_type; - using pointer = size_type*; - using iterator_category = std::random_access_iterator_tag; - - base_indexalator() = default; - base_indexalator(base_indexalator const&) = default; - base_indexalator(base_indexalator&&) = default; - base_indexalator& operator=(base_indexalator const&) = default; - base_indexalator& operator=(base_indexalator&&) = default; - - /** - * @brief Prefix increment operator. - */ - CUDF_HOST_DEVICE inline T& operator++() - { - T& derived = static_cast(*this); - derived.p_ += width_; - return derived; - } - - /** - * @brief Postfix increment operator. - */ - CUDF_HOST_DEVICE inline T operator++(int) - { - T tmp{static_cast(*this)}; - operator++(); - return tmp; - } - - /** - * @brief Prefix decrement operator. - */ - CUDF_HOST_DEVICE inline T& operator--() - { - T& derived = static_cast(*this); - derived.p_ -= width_; - return derived; - } - - /** - * @brief Postfix decrement operator. - */ - CUDF_HOST_DEVICE inline T operator--(int) - { - T tmp{static_cast(*this)}; - operator--(); - return tmp; - } - - /** - * @brief Compound assignment by sum operator. - */ - CUDF_HOST_DEVICE inline T& operator+=(difference_type offset) - { - T& derived = static_cast(*this); - derived.p_ += offset * width_; - return derived; - } - - /** - * @brief Increment by offset operator. - */ - CUDF_HOST_DEVICE inline T operator+(difference_type offset) const - { - auto tmp = T{static_cast(*this)}; - tmp.p_ += (offset * width_); - return tmp; - } - - /** - * @brief Addition assignment operator. - */ - CUDF_HOST_DEVICE inline friend T operator+(difference_type offset, T const& rhs) - { - T tmp{rhs}; - tmp.p_ += (offset * rhs.width_); - return tmp; - } - - /** - * @brief Compound assignment by difference operator. - */ - CUDF_HOST_DEVICE inline T& operator-=(difference_type offset) - { - T& derived = static_cast(*this); - derived.p_ -= offset * width_; - return derived; - } - - /** - * @brief Decrement by offset operator. - */ - CUDF_HOST_DEVICE inline T operator-(difference_type offset) const - { - auto tmp = T{static_cast(*this)}; - tmp.p_ -= (offset * width_); - return tmp; - } - - /** - * @brief Subtraction assignment operator. - */ - CUDF_HOST_DEVICE inline friend T operator-(difference_type offset, T const& rhs) - { - T tmp{rhs}; - tmp.p_ -= (offset * rhs.width_); - return tmp; - } - - /** - * @brief Compute offset from iterator difference operator. - */ - CUDF_HOST_DEVICE inline difference_type operator-(T const& rhs) const - { - return (static_cast(*this).p_ - rhs.p_) / width_; - } - - /** - * @brief Equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator==(T const& rhs) const - { - return rhs.p_ == static_cast(*this).p_; - } - /** - * @brief Not equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator!=(T const& rhs) const - { - return rhs.p_ != static_cast(*this).p_; - } - /** - * @brief Less than operator. - */ - CUDF_HOST_DEVICE inline bool operator<(T const& rhs) const - { - return static_cast(*this).p_ < rhs.p_; - } - /** - * @brief Greater than operator. - */ - CUDF_HOST_DEVICE inline bool operator>(T const& rhs) const - { - return static_cast(*this).p_ > rhs.p_; - } - /** - * @brief Less than or equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator<=(T const& rhs) const - { - return static_cast(*this).p_ <= rhs.p_; - } - /** - * @brief Greater than or equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator>=(T const& rhs) const - { - return static_cast(*this).p_ >= rhs.p_; - } - - protected: - /** - * @brief Constructor assigns width and type member variables for base class. - */ - base_indexalator(int32_t width, data_type dtype) : width_(width), dtype_(dtype) {} - - int width_; /// integer type width = 1,2,4, or 8 - data_type dtype_; /// for type-dispatcher calls -}; - /** * @brief The index normalizing input iterator. * @@ -244,65 +56,7 @@ struct base_indexalator { * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); * @endcode */ -struct input_indexalator : base_indexalator { - friend struct indexalator_factory; - friend struct base_indexalator; // for CRTP - - using reference = size_type const; // this keeps STL and thrust happy - - input_indexalator() = default; - input_indexalator(input_indexalator const&) = default; - input_indexalator(input_indexalator&&) = default; - input_indexalator& operator=(input_indexalator const&) = default; - input_indexalator& operator=(input_indexalator&&) = default; - - /** - * @brief Indirection operator returns the value at the current iterator position. - */ - __device__ inline size_type operator*() const { return operator[](0); } - - /** - * @brief Dispatch functor for resolving a size_type value from any index type. - */ - struct index_as_size_type { - template ()>* = nullptr> - __device__ size_type operator()(void const* tp) - { - return static_cast(*static_cast(tp)); - } - template ()>* = nullptr> - __device__ size_type operator()(void const* tp) - { - CUDF_UNREACHABLE("only index types are supported"); - } - }; - /** - * @brief Array subscript operator returns a value at the input - * `idx` position as a `size_type` value. - */ - __device__ inline size_type operator[](size_type idx) const - { - void const* tp = p_ + (idx * width_); - return type_dispatcher(dtype_, index_as_size_type{}, tp); - } - - protected: - /** - * @brief Create an input index normalizing iterator. - * - * Use the indexalator_factory to create an iterator instance. - * - * @param data Pointer to an integer array in device memory. - * @param width The width of the integer type (1, 2, 4, or 8) - * @param data_type Index integer type of width `width` - */ - input_indexalator(void const* data, int width, data_type dtype) - : base_indexalator(width, dtype), p_{static_cast(data)} - { - } - - char const* p_; /// pointer to the integer data in device memory -}; +using input_indexalator = input_normalator; /** * @brief The index normalizing output iterator. @@ -328,79 +82,7 @@ struct input_indexalator : base_indexalator { * thrust::less()); * @endcode */ -struct output_indexalator : base_indexalator { - friend struct indexalator_factory; - friend struct base_indexalator; // for CRTP - - using reference = output_indexalator const&; // required for output iterators - - output_indexalator() = default; - output_indexalator(output_indexalator const&) = default; - output_indexalator(output_indexalator&&) = default; - output_indexalator& operator=(output_indexalator const&) = default; - output_indexalator& operator=(output_indexalator&&) = default; - - /** - * @brief Indirection operator returns this iterator instance in order - * to capture the `operator=(size_type)` calls. - */ - __device__ inline output_indexalator const& operator*() const { return *this; } - - /** - * @brief Array subscript operator returns an iterator instance at the specified `idx` position. - * - * This allows capturing the subsequent `operator=(size_type)` call in this class. - */ - __device__ inline output_indexalator const operator[](size_type idx) const - { - output_indexalator tmp{*this}; - tmp.p_ += (idx * width_); - return tmp; - } - - /** - * @brief Dispatch functor for setting the index value from a size_type value. - */ - struct size_type_to_index { - template ()>* = nullptr> - __device__ void operator()(void* tp, size_type const value) - { - (*static_cast(tp)) = static_cast(value); - } - template ()>* = nullptr> - __device__ void operator()(void* tp, size_type const value) - { - CUDF_UNREACHABLE("only index types are supported"); - } - }; - - /** - * @brief Assign a size_type value to the current iterator position. - */ - __device__ inline output_indexalator const& operator=(size_type const value) const - { - void* tp = p_; - type_dispatcher(dtype_, size_type_to_index{}, tp, value); - return *this; - } - - protected: - /** - * @brief Create an output index normalizing iterator. - * - * Use the indexalator_factory to create an iterator instance. - * - * @param data Pointer to an integer array in device memory. - * @param width The width of the integer type (1, 2, 4, or 8) - * @param data_type Index integer type of width `width` - */ - output_indexalator(void* data, int width, data_type dtype) - : base_indexalator(width, dtype), p_{static_cast(data)} - { - } - - char* p_; /// pointer to the integer data in device memory -}; +using output_indexalator = output_normalator; /** * @brief Use this class to create an indexalator instance. @@ -413,7 +95,7 @@ struct indexalator_factory { template ()>* = nullptr> input_indexalator operator()(column_view const& indices) { - return input_indexalator(indices.data(), sizeof(IndexType), indices.type()); + return input_indexalator(indices.data(), indices.type()); } template const&>(index) creates a copy auto const scalar_impl = static_cast const*>(&index); - return input_indexalator(scalar_impl->data(), sizeof(IndexType), index.type()); + return input_indexalator(scalar_impl->data(), index.type()); } template ()>* = nullptr> output_indexalator operator()(mutable_column_view const& indices) { - return output_indexalator(indices.data(), sizeof(IndexType), indices.type()); + return output_indexalator(indices.data(), indices.type()); } template + +#include + +namespace cudf { +namespace detail { + +/** + * @brief The base class for the input or output normalizing iterator + * + * The base class mainly manages updating the `p_` member variable while the + * subclasses handle accessing individual elements in device memory. + * + * @tparam Derived The derived class type for the iterator + * @tparam Integer The type the iterator normalizes to + */ +template +struct base_normalator { + static_assert(std::is_integral_v); + using difference_type = std::ptrdiff_t; + using value_type = Integer; + using pointer = Integer*; + using iterator_category = std::random_access_iterator_tag; + + base_normalator() = default; + base_normalator(base_normalator const&) = default; + base_normalator(base_normalator&&) = default; + base_normalator& operator=(base_normalator const&) = default; + base_normalator& operator=(base_normalator&&) = default; + + /** + * @brief Prefix increment operator. + */ + CUDF_HOST_DEVICE inline Derived& operator++() + { + Derived& derived = static_cast(*this); + derived.p_ += width_; + return derived; + } + + /** + * @brief Postfix increment operator. + */ + CUDF_HOST_DEVICE inline Derived operator++(int) + { + Derived tmp{static_cast(*this)}; + operator++(); + return tmp; + } + + /** + * @brief Prefix decrement operator. + */ + CUDF_HOST_DEVICE inline Derived& operator--() + { + Derived& derived = static_cast(*this); + derived.p_ -= width_; + return derived; + } + + /** + * @brief Postfix decrement operator. + */ + CUDF_HOST_DEVICE inline Derived operator--(int) + { + Derived tmp{static_cast(*this)}; + operator--(); + return tmp; + } + + /** + * @brief Compound assignment by sum operator. + */ + CUDF_HOST_DEVICE inline Derived& operator+=(difference_type offset) + { + Derived& derived = static_cast(*this); + derived.p_ += offset * width_; + return derived; + } + + /** + * @brief Increment by offset operator. + */ + CUDF_HOST_DEVICE inline Derived operator+(difference_type offset) const + { + auto tmp = Derived{static_cast(*this)}; + tmp.p_ += (offset * width_); + return tmp; + } + + /** + * @brief Addition assignment operator. + */ + CUDF_HOST_DEVICE inline friend Derived operator+(difference_type offset, Derived const& rhs) + { + Derived tmp{rhs}; + tmp.p_ += (offset * rhs.width_); + return tmp; + } + + /** + * @brief Compound assignment by difference operator. + */ + CUDF_HOST_DEVICE inline Derived& operator-=(difference_type offset) + { + Derived& derived = static_cast(*this); + derived.p_ -= offset * width_; + return derived; + } + + /** + * @brief Decrement by offset operator. + */ + CUDF_HOST_DEVICE inline Derived operator-(difference_type offset) const + { + auto tmp = Derived{static_cast(*this)}; + tmp.p_ -= (offset * width_); + return tmp; + } + + /** + * @brief Subtraction assignment operator. + */ + CUDF_HOST_DEVICE inline friend Derived operator-(difference_type offset, Derived const& rhs) + { + Derived tmp{rhs}; + tmp.p_ -= (offset * rhs.width_); + return tmp; + } + + /** + * @brief Compute offset from iterator difference operator. + */ + CUDF_HOST_DEVICE inline difference_type operator-(Derived const& rhs) const + { + return (static_cast(*this).p_ - rhs.p_) / width_; + } + + /** + * @brief Equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator==(Derived const& rhs) const + { + return rhs.p_ == static_cast(*this).p_; + } + + /** + * @brief Not equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator!=(Derived const& rhs) const + { + return rhs.p_ != static_cast(*this).p_; + } + + /** + * @brief Less than operator. + */ + CUDF_HOST_DEVICE inline bool operator<(Derived const& rhs) const + { + return static_cast(*this).p_ < rhs.p_; + } + + /** + * @brief Greater than operator. + */ + CUDF_HOST_DEVICE inline bool operator>(Derived const& rhs) const + { + return static_cast(*this).p_ > rhs.p_; + } + + /** + * @brief Less than or equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator<=(Derived const& rhs) const + { + return static_cast(*this).p_ <= rhs.p_; + } + + /** + * @brief Greater than or equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator>=(Derived const& rhs) const + { + return static_cast(*this).p_ >= rhs.p_; + } + + protected: + /** + * @brief Constructor assigns width and type member variables for base class. + */ + explicit base_normalator(data_type dtype) : width_(size_of(dtype)), dtype_(dtype) {} + + int width_; /// integer type width = 1,2,4, or 8 + data_type dtype_; /// for type-dispatcher calls +}; + +/** + * @brief The integer normalizing input iterator + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for reading an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Reading specific elements always return a type of `Integer` + * + * @tparam Integer Type returned by all read functions + */ +template +struct input_normalator : base_normalator, Integer> { + friend struct base_normalator, Integer>; // for CRTP + + using reference = Integer const; // this keeps STL and thrust happy + + input_normalator() = default; + input_normalator(input_normalator const&) = default; + input_normalator(input_normalator&&) = default; + input_normalator& operator=(input_normalator const&) = default; + input_normalator& operator=(input_normalator&&) = default; + + /** + * @brief Indirection operator returns the value at the current iterator position + */ + __device__ inline Integer operator*() const { return operator[](0); } + + /** + * @brief Dispatch functor for resolving a Integer value from any integer type + */ + struct normalize_type { + template >* = nullptr> + __device__ Integer operator()(void const* tp) + { + return static_cast(*static_cast(tp)); + } + template >* = nullptr> + __device__ Integer operator()(void const*) + { + CUDF_UNREACHABLE("only integral types are supported"); + } + }; + + /** + * @brief Array subscript operator returns a value at the input + * `idx` position as a `Integer` value. + */ + __device__ inline Integer operator[](size_type idx) const + { + void const* tp = p_ + (idx * this->width_); + return type_dispatcher(this->dtype_, normalize_type{}, tp); + } + + /** + * @brief Create an input index normalizing iterator. + * + * Use the indexalator_factory to create an iterator instance. + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + input_normalator(void const* data, data_type dtype) + : base_normalator, Integer>(dtype), p_{static_cast(data)} + { + } + + char const* p_; /// pointer to the integer data in device memory +}; + +/** + * @brief The integer normalizing output iterator + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for writing an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Setting specific elements always accept the `Integer` type values. + * + * @tparam Integer The type used for all write functions + */ +template +struct output_normalator : base_normalator, Integer> { + friend struct base_normalator, Integer>; // for CRTP + + using reference = output_normalator const&; // required for output iterators + + output_normalator() = default; + output_normalator(output_normalator const&) = default; + output_normalator(output_normalator&&) = default; + output_normalator& operator=(output_normalator const&) = default; + output_normalator& operator=(output_normalator&&) = default; + + /** + * @brief Indirection operator returns this iterator instance in order + * to capture the `operator=(Integer)` calls. + */ + __device__ inline output_normalator const& operator*() const { return *this; } + + /** + * @brief Array subscript operator returns an iterator instance at the specified `idx` position. + * + * This allows capturing the subsequent `operator=(Integer)` call in this class. + */ + __device__ inline output_normalator const operator[](size_type idx) const + { + output_normalator tmp{*this}; + tmp.p_ += (idx * this->width_); + return tmp; + } + + /** + * @brief Dispatch functor for setting the index value from a size_type value. + */ + struct normalize_type { + template >* = nullptr> + __device__ void operator()(void* tp, Integer const value) + { + (*static_cast(tp)) = static_cast(value); + } + template >* = nullptr> + __device__ void operator()(void*, Integer const) + { + CUDF_UNREACHABLE("only index types are supported"); + } + }; + + /** + * @brief Assign an Integer value to the current iterator position + */ + __device__ inline output_normalator const& operator=(Integer const value) const + { + void* tp = p_; + type_dispatcher(this->dtype_, normalize_type{}, tp, value); + return *this; + } + + /** + * @brief Create an output normalizing iterator + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + output_normalator(void* data, data_type dtype) + : base_normalator, Integer>(dtype), p_{static_cast(data)} + { + } + + char* p_; /// pointer to the integer data in device memory +}; + +} // namespace detail +} // namespace cudf From 517d1239c913c86f7c1d9dc6642434e73aa2b14c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 22 Sep 2023 12:40:09 -0700 Subject: [PATCH 09/76] Expose streams in all public sorting APIs (#14146) Contributes to #925 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/14146 --- cpp/include/cudf/sorting.hpp | 44 ++++++--- cpp/src/lists/segmented_sort.cu | 30 +++--- cpp/src/sort/is_sorted.cu | 5 +- cpp/src/sort/rank.cu | 11 +-- cpp/src/sort/segmented_sort.cu | 8 +- cpp/src/sort/segmented_sort_impl.cuh | 2 +- cpp/src/sort/sort.cu | 10 +- cpp/src/sort/stable_segmented_sort.cu | 8 +- cpp/src/sort/stable_sort.cu | 8 +- cpp/tests/CMakeLists.txt | 7 +- cpp/tests/streams/sorting_test.cpp | 132 ++++++++++++++++++++++++++ 11 files changed, 210 insertions(+), 55 deletions(-) create mode 100644 cpp/tests/streams/sorting_test.cpp diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 6924e77ae9b..e4e803b2d3c 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -18,6 +18,7 @@ #include #include +#include #include @@ -43,6 +44,7 @@ namespace cudf { * @param null_precedence The desired order of null compared to other elements * for each column. Size must be equal to `input.num_columns()` or empty. * If empty, all columns will be sorted in `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return A non-nullable column of elements containing the permuted row indices of * `input` if it were sorted @@ -51,6 +53,7 @@ std::unique_ptr sorted_order( table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -65,27 +68,30 @@ std::unique_ptr stable_sorted_order( table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Checks whether the rows of a `table` are sorted in a lexicographical * order. * - * @param[in] table Table whose rows need to be compared for ordering - * @param[in] column_order The expected sort order for each column. Size - * must be equal to `in.num_columns()` or empty. If - * empty, it is expected all columns are in - * ascending order. - * @param[in] null_precedence The desired order of null compared to other - * elements for each column. Size must be equal to - * `input.num_columns()` or empty. If empty, - * `null_order::BEFORE` is assumed for all columns. - * - * @returns bool true if sorted as expected, false if not + * @param table Table whose rows need to be compared for ordering + * @param column_order The expected sort order for each column. Size + * must be equal to `in.num_columns()` or empty. If + * empty, it is expected all columns are in + * ascending order. + * @param null_precedence The desired order of null compared to other + * elements for each column. Size must be equal to + * `input.num_columns()` or empty. If empty, + * `null_order::BEFORE` is assumed for all columns. + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @returns true if sorted as expected, false if not */ bool is_sorted(cudf::table_view const& table, std::vector const& column_order, - std::vector const& null_precedence); + std::vector const& null_precedence, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Performs a lexicographic sort of the rows of a table @@ -98,6 +104,7 @@ bool is_sorted(cudf::table_view const& table, * elements for each column in `input`. Size must be equal to * `input.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return New table containing the desired sorted order of `input` */ @@ -105,6 +112,7 @@ std::unique_ptr sort( table_view const& input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,6 +132,7 @@ std::unique_ptr
sort( * elements for each column in `keys`. Size must be equal to * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return The reordering of `values` determined by the lexicographic order of * the rows of `keys`. @@ -133,6 +142,7 @@ std::unique_ptr
sort_by_key( table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -154,6 +164,7 @@ std::unique_ptr
sort_by_key( * elements for each column in `keys`. Size must be equal to * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return The reordering of `values` determined by the lexicographic order of * the rows of `keys`. @@ -163,6 +174,7 @@ std::unique_ptr
stable_sort_by_key( table_view const& keys, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -189,6 +201,7 @@ std::unique_ptr
stable_sort_by_key( * @param null_precedence The desired order of null compared to other elements * for column * @param percentage flag to convert ranks to percentage in range (0,1] + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return A column of containing the rank of the each element of the column of `input`. The output * column type will be `size_type`column by default or else `double` when @@ -201,6 +214,7 @@ std::unique_ptr rank( null_policy null_handling, null_order null_precedence, bool percentage, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -241,6 +255,7 @@ std::unique_ptr rank( * elements for each column in `keys`. Size must be equal to * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to allocate any returned objects * @return sorted order of the segment sorted table * @@ -250,6 +265,7 @@ std::unique_ptr segmented_sorted_order( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -262,6 +278,7 @@ std::unique_ptr stable_segmented_sorted_order( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -306,6 +323,7 @@ std::unique_ptr stable_segmented_sorted_order( * elements for each column in `keys`. Size must be equal to * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to allocate any returned objects * @return table with elements in each segment sorted * @@ -316,6 +334,7 @@ std::unique_ptr
segmented_sort_by_key( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -329,6 +348,7 @@ std::unique_ptr
stable_segmented_sort_by_key( column_view const& segment_offsets, std::vector const& column_order = {}, std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 260636a61cf..49054ebb046 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,13 +70,13 @@ std::unique_ptr sort_lists(lists_column_view const& input, auto output_offset = build_output_offsets(input, stream, mr); auto const child = input.get_sliced_child(stream); - auto const sorted_child_table = segmented_sort_by_key(table_view{{child}}, - table_view{{child}}, - output_offset->view(), - {column_order}, - {null_precedence}, - stream, - mr); + auto const sorted_child_table = cudf::detail::segmented_sort_by_key(table_view{{child}}, + table_view{{child}}, + output_offset->view(), + {column_order}, + {null_precedence}, + stream, + mr); return make_lists_column(input.size(), std::move(output_offset), @@ -98,13 +98,13 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, auto output_offset = build_output_offsets(input, stream, mr); auto const child = input.get_sliced_child(stream); - auto const sorted_child_table = stable_segmented_sort_by_key(table_view{{child}}, - table_view{{child}}, - output_offset->view(), - {column_order}, - {null_precedence}, - stream, - mr); + auto const sorted_child_table = cudf::detail::stable_segmented_sort_by_key(table_view{{child}}, + table_view{{child}}, + output_offset->view(), + {column_order}, + {null_precedence}, + stream, + mr); return make_lists_column(input.size(), std::move(output_offset), diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu index 25c594e9e74..39476a2f534 100644 --- a/cpp/src/sort/is_sorted.cu +++ b/cpp/src/sort/is_sorted.cu @@ -73,7 +73,8 @@ bool is_sorted(cudf::table_view const& in, bool is_sorted(cudf::table_view const& in, std::vector const& column_order, - std::vector const& null_precedence) + std::vector const& null_precedence, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); if (in.num_columns() == 0 || in.num_rows() == 0) { return true; } @@ -89,7 +90,7 @@ bool is_sorted(cudf::table_view const& in, "Number of columns in the table doesn't match the vector null_precedence's size .\n"); } - return detail::is_sorted(in, column_order, null_precedence, cudf::get_default_stream()); + return detail::is_sorted(in, column_order, null_precedence, stream); } } // namespace cudf diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index fd65e38d467..3ead8cfcbaa 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -366,16 +366,11 @@ std::unique_ptr rank(column_view const& input, null_policy null_handling, null_order null_precedence, bool percentage, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rank(input, - method, - column_order, - null_handling, - null_precedence, - percentage, - cudf::get_default_stream(), - mr); + return detail::rank( + input, method, column_order, null_handling, null_precedence, percentage, stream, mr); } } // namespace cudf diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 38d008c120c..d9457341bd2 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,11 +81,12 @@ std::unique_ptr segmented_sorted_order(table_view const& keys, column_view const& segment_offsets, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::segmented_sorted_order( - keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); + keys, segment_offsets, column_order, null_precedence, stream, mr); } std::unique_ptr
segmented_sort_by_key(table_view const& values, @@ -93,11 +94,12 @@ std::unique_ptr
segmented_sort_by_key(table_view const& values, column_view const& segment_offsets, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::segmented_sort_by_key( - values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); + values, keys, segment_offsets, column_order, null_precedence, stream, mr); } } // namespace cudf diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index 37664f33762..5d11bf055f1 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -166,7 +166,7 @@ std::unique_ptr fast_segmented_sorted_order(column_view const& input, // Unfortunately, CUB's segmented sort functions cannot accept iterators. // We have to build a pre-filled sequence of indices as input. auto sorted_indices = - cudf::detail::sequence(input.size(), numeric_scalar{0}, stream, mr); + cudf::detail::sequence(input.size(), numeric_scalar{0, true, stream}, stream, mr); auto indices_view = sorted_indices->mutable_view(); cudf::type_dispatcher(input.type(), diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu index 25b95af4f83..46edae798d4 100644 --- a/cpp/src/sort/sort.cu +++ b/cpp/src/sort/sort.cu @@ -109,30 +109,32 @@ std::unique_ptr
sort(table_view const& input, std::unique_ptr sorted_order(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sorted_order(input, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::sorted_order(input, column_order, null_precedence, stream, mr); } std::unique_ptr
sort(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sort(input, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::sort(input, column_order, null_precedence, stream, mr); } std::unique_ptr
sort_by_key(table_view const& values, table_view const& keys, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sort_by_key( - values, keys, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::sort_by_key(values, keys, column_order, null_precedence, stream, mr); } } // namespace cudf diff --git a/cpp/src/sort/stable_segmented_sort.cu b/cpp/src/sort/stable_segmented_sort.cu index 40df1b50279..4725d65e05d 100644 --- a/cpp/src/sort/stable_segmented_sort.cu +++ b/cpp/src/sort/stable_segmented_sort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,11 +55,12 @@ std::unique_ptr stable_segmented_sorted_order( column_view const& segment_offsets, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::stable_segmented_sorted_order( - keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); + keys, segment_offsets, column_order, null_precedence, stream, mr); } std::unique_ptr
stable_segmented_sort_by_key(table_view const& values, @@ -67,11 +68,12 @@ std::unique_ptr
stable_segmented_sort_by_key(table_view const& values, column_view const& segment_offsets, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::stable_segmented_sort_by_key( - values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr); + values, keys, segment_offsets, column_order, null_precedence, stream, mr); } } // namespace cudf diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu index 6f5678c4168..cf602dcf1a9 100644 --- a/cpp/src/sort/stable_sort.cu +++ b/cpp/src/sort/stable_sort.cu @@ -62,22 +62,22 @@ std::unique_ptr
stable_sort_by_key(table_view const& values, std::unique_ptr stable_sorted_order(table_view const& input, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::stable_sorted_order( - input, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::stable_sorted_order(input, column_order, null_precedence, stream, mr); } std::unique_ptr
stable_sort_by_key(table_view const& values, table_view const& keys, std::vector const& column_order, std::vector const& null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::stable_sort_by_key( - values, keys, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::stable_sort_by_key(values, keys, column_order, null_precedence, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ba4921848d7..c7d3e2af19f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -621,17 +621,18 @@ ConfigureTest( STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu ) -ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE testing ) +ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) # ################################################################################################## diff --git a/cpp/tests/streams/sorting_test.cpp b/cpp/tests/streams/sorting_test.cpp new file mode 100644 index 00000000000..e481f95bded --- /dev/null +++ b/cpp/tests/streams/sorting_test.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +class SortingTest : public cudf::test::BaseFixture {}; + +TEST_F(SortingTest, SortedOrder) +{ + cudf::test::fixed_width_column_wrapper const column{10, 20, 30, 40, 50}; + cudf::table_view const tbl{{column}}; + + cudf::sorted_order(tbl, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, StableSortedOrder) +{ + cudf::test::fixed_width_column_wrapper const column{10, 20, 30, 40, 50}; + cudf::table_view const tbl{{column}}; + + cudf::stable_sorted_order(tbl, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, IsSorted) +{ + cudf::test::fixed_width_column_wrapper const column{10, 20, 30, 40, 50}; + cudf::table_view const tbl{{column}}; + + cudf::is_sorted(tbl, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, Sort) +{ + cudf::test::fixed_width_column_wrapper const column{10, 20, 30, 40, 50}; + cudf::table_view const tbl{{column}}; + + cudf::sort(tbl, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, SortByKey) +{ + cudf::test::fixed_width_column_wrapper const values_col{10, 20, 30, 40, 50}; + cudf::table_view const values{{values_col}}; + cudf::test::fixed_width_column_wrapper const keys_col{10, 20, 30, 40, 50}; + cudf::table_view const keys{{keys_col}}; + + cudf::sort_by_key(values, keys, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, StableSortByKey) +{ + cudf::test::fixed_width_column_wrapper const values_col{10, 20, 30, 40, 50}; + cudf::table_view const values{{values_col}}; + cudf::test::fixed_width_column_wrapper const keys_col{10, 20, 30, 40, 50}; + cudf::table_view const keys{{keys_col}}; + + cudf::stable_sort_by_key(values, keys, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, Rank) +{ + cudf::test::fixed_width_column_wrapper const column{10, 20, 30, 40, 50}; + + cudf::rank(column, + cudf::rank_method::AVERAGE, + cudf::order::ASCENDING, + cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + false, + cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, SegmentedSortedOrder) +{ + cudf::test::fixed_width_column_wrapper const keys_col{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + cudf::table_view const keys{{keys_col}}; + cudf::test::fixed_width_column_wrapper const segment_offsets{3, 7}; + + cudf::segmented_sorted_order(keys, segment_offsets, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, StableSegmentedSortedOrder) +{ + cudf::test::fixed_width_column_wrapper const keys_col{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + cudf::table_view const keys{{keys_col}}; + cudf::test::fixed_width_column_wrapper const segment_offsets{3, 7}; + + cudf::stable_segmented_sorted_order( + keys, segment_offsets, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, SegmentedSortByKey) +{ + cudf::test::fixed_width_column_wrapper const keys_col{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + cudf::table_view const keys{{keys_col}}; + cudf::test::fixed_width_column_wrapper const values_col{7, 6, 9, 3, 4, 5, 1, 2, 0, 4}; + cudf::table_view const values{{values_col}}; + cudf::test::fixed_width_column_wrapper const segment_offsets{0, 3, 7, 10}; + + cudf::segmented_sort_by_key( + values, keys, segment_offsets, {}, {}, cudf::test::get_default_stream()); +} + +TEST_F(SortingTest, StableSegmentedSortByKey) +{ + cudf::test::fixed_width_column_wrapper const keys_col{9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + cudf::table_view const keys{{keys_col}}; + cudf::test::fixed_width_column_wrapper const values_col{7, 6, 9, 3, 4, 5, 1, 2, 0, 4}; + cudf::table_view const values{{values_col}}; + cudf::test::fixed_width_column_wrapper const segment_offsets{0, 3, 7, 10}; + + cudf::stable_segmented_sort_by_key( + values, keys, segment_offsets, {}, {}, cudf::test::get_default_stream()); +} From 71f30bec80194e8711156cea90d09b4ee0c940bd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 22 Sep 2023 17:59:25 -0700 Subject: [PATCH 10/76] Enable direct ingestion and production of Arrow scalars (#14121) This PR adds overloads of `from_arrow` and `to_arrow` for scalars to enable interoperability on par with Arrow Arrays. The new public APIs accept streams, and for consistency streams have also been added to the corresponding column APIs, so this PR contributes to #925. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14121 --- cpp/include/cudf/detail/interop.hpp | 80 ++++++++++++++++++-- cpp/include/cudf/interop.hpp | 35 ++++++++- cpp/src/interop/from_arrow.cu | 88 +++++++++++++++++++++- cpp/src/interop/to_arrow.cu | 99 +++++++++++++++++++------ cpp/tests/CMakeLists.txt | 1 + cpp/tests/interop/from_arrow_test.cpp | 95 ++++++++++++++++++++++++ cpp/tests/interop/to_arrow_test.cpp | 103 ++++++++++++++++++++++++++ cpp/tests/streams/interop_test.cpp | 68 +++++++++++++++++ 8 files changed, 540 insertions(+), 29 deletions(-) create mode 100644 cpp/tests/streams/interop_test.cpp diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 3d4832c8d17..44024333239 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -104,13 +104,67 @@ std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) } } +/** + * @brief Invokes an `operator()` template with the type instantiation based on + * the specified `arrow::DataType`'s `id()`. + * + * This function is analogous to libcudf's type_dispatcher, but instead applies + * to Arrow functions. Its primary use case is to leverage Arrow's + * metaprogramming facilities like arrow::TypeTraits that require translating + * the runtime dtype information into compile-time types. + */ +template +constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype, + Functor f, + Ts&&... args) +{ + switch (dtype.id()) { + case arrow::Type::INT8: + return f.template operator()(std::forward(args)...); + case arrow::Type::INT16: + return f.template operator()(std::forward(args)...); + case arrow::Type::INT32: + return f.template operator()(std::forward(args)...); + case arrow::Type::INT64: + return f.template operator()(std::forward(args)...); + case arrow::Type::UINT8: + return f.template operator()(std::forward(args)...); + case arrow::Type::UINT16: + return f.template operator()(std::forward(args)...); + case arrow::Type::UINT32: + return f.template operator()(std::forward(args)...); + case arrow::Type::UINT64: + return f.template operator()(std::forward(args)...); + case arrow::Type::FLOAT: + return f.template operator()(std::forward(args)...); + case arrow::Type::DOUBLE: + return f.template operator()(std::forward(args)...); + case arrow::Type::BOOL: + return f.template operator()(std::forward(args)...); + case arrow::Type::TIMESTAMP: + return f.template operator()(std::forward(args)...); + case arrow::Type::DURATION: + return f.template operator()(std::forward(args)...); + case arrow::Type::STRING: + return f.template operator()(std::forward(args)...); + case arrow::Type::LIST: + return f.template operator()(std::forward(args)...); + case arrow::Type::DECIMAL128: + return f.template operator()(std::forward(args)...); + case arrow::Type::STRUCT: + return f.template operator()(std::forward(args)...); + default: { + CUDF_FAIL("Invalid type."); + } + } +} + // Converting arrow type to cudf type data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); /** - * @copydoc cudf::to_arrow - * - * @param stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::to_arrow(table_view input, std::vector const& metadata, + * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr) */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata, @@ -118,13 +172,27 @@ std::shared_ptr to_arrow(table_view input, arrow::MemoryPool* ar_mr); /** - * @copydoc cudf::arrow_to_cudf - * - * @param stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata, + * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr) + */ +std::shared_ptr to_arrow(cudf::scalar const& input, + column_metadata const& metadata, + rmm::cuda_stream_view stream, + arrow::MemoryPool* ar_mr); +/** + * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream, + * rmm::mr::device_memory_resource* mr) */ std::unique_ptr
from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream, + * rmm::mr::device_memory_resource* mr) + */ +std::unique_ptr from_arrow(arrow::Scalar const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index e210179b147..865cc004107 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -126,23 +126,56 @@ struct column_metadata { * * @param input table_view that needs to be converted to arrow Table * @param metadata Contains hierarchy of names of columns and children + * @param stream CUDA stream used for device memory operations and kernel launches * @param ar_mr arrow memory pool to allocate memory for arrow Table * @return arrow Table generated from `input` */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata = {}, - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); +/** + * @brief Create `arrow::Scalar` from cudf scalar `input` + * + * Converts the `cudf::scalar` to `arrow::Scalar`. + * + * @param input scalar that needs to be converted to arrow Scalar + * @param metadata Contains hierarchy of names of columns and children + * @param stream CUDA stream used for device memory operations and kernel launches + * @param ar_mr arrow memory pool to allocate memory for arrow Scalar + * @return arrow Scalar generated from `input` + */ +std::shared_ptr to_arrow(cudf::scalar const& input, + column_metadata const& metadata = {}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); /** * @brief Create `cudf::table` from given arrow Table input * * @param input arrow:Table that needs to be converted to `cudf::table` + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate `cudf::table` * @return cudf table generated from given arrow Table */ std::unique_ptr
from_arrow( arrow::Table const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create `cudf::scalar` from given arrow Scalar input + * + * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar` + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate `cudf::scalar` + * @return cudf scalar generated from given arrow Scalar + */ + +std::unique_ptr from_arrow( + arrow::Scalar const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 30cfee97fd8..e39625c92e7 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -419,6 +419,52 @@ std::unique_ptr get_column(arrow::Array const& array, : get_empty_type_column(array.length()); } +struct BuilderGenerator { + template && + !std::is_same_v)> + std::shared_ptr operator()(std::shared_ptr const& type) + { + return std::make_shared::BuilderType>( + type, arrow::default_memory_pool()); + } + + template || + std::is_same_v)> + std::shared_ptr operator()(std::shared_ptr const& type) + { + CUDF_FAIL("Type not supported by BuilderGenerator"); + } +}; + +std::shared_ptr make_builder(std::shared_ptr const& type) +{ + switch (type->id()) { + case arrow::Type::STRUCT: { + std::vector> field_builders; + + for (auto field : type->fields()) { + auto const vt = field->type(); + if (vt->id() == arrow::Type::STRUCT || vt->id() == arrow::Type::LIST) { + field_builders.push_back(make_builder(vt)); + } else { + field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt)); + } + } + return std::make_shared( + type, arrow::default_memory_pool(), field_builders); + } + case arrow::Type::LIST: { + return std::make_shared(arrow::default_memory_pool(), + make_builder(type->field(0)->type())); + } + default: { + return arrow_type_dispatcher(*type, BuilderGenerator{}, type); + } + } +} + } // namespace std::unique_ptr
from_arrow(arrow::Table const& input_table, @@ -462,14 +508,54 @@ std::unique_ptr
from_arrow(arrow::Table const& input_table, return std::make_unique
(std::move(columns)); } +std::unique_ptr from_arrow(arrow::Scalar const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Get a builder for the scalar type + auto builder = detail::make_builder(input.type); + + auto status = builder->AppendScalar(input); + if (status != arrow::Status::OK()) { + if (status.IsNotImplemented()) { + // The only known failure case here is for nulls + CUDF_FAIL("Cannot create untyped null scalars or nested types with untyped null leaf nodes", + std::invalid_argument); + } + CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); + } + + auto maybe_array = builder->Finish(); + if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); } + auto array = *maybe_array; + + auto field = arrow::field("", input.type); + + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + auto cudf_table = detail::from_arrow(*table, stream, mr); + + auto cv = cudf_table->view().column(0); + return get_element(cv, 0, stream); +} + } // namespace detail std::unique_ptr
from_arrow(arrow::Table const& input_table, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_arrow(input_table, cudf::get_default_stream(), mr); + return detail::from_arrow(input_table, stream, mr); } +std::unique_ptr from_arrow(arrow::Scalar const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + + return detail::from_arrow(input, stream, mr); +} } // namespace cudf diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 958a2fcb95f..0cd750bc947 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -15,14 +15,16 @@ */ #include +#include #include +#include #include #include +#include #include #include #include #include -#include #include #include #include @@ -77,7 +79,10 @@ std::shared_ptr fetch_mask_buffer(column_view input_view, auto mask_buffer = allocate_arrow_bitmap(static_cast(input_view.size()), ar_mr); CUDF_CUDA_TRY(cudaMemcpyAsync( mask_buffer->mutable_data(), - (input_view.offset() > 0) ? cudf::copy_bitmask(input_view).data() : input_view.null_mask(), + (input_view.offset() > 0) + ? cudf::detail::copy_bitmask(input_view, stream, rmm::mr::get_current_device_resource()) + .data() + : input_view.null_mask(), mask_size_in_bytes, cudaMemcpyDefault, stream.value())); @@ -139,29 +144,36 @@ struct dispatch_to_arrow { } }; -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) +// Convert decimal types from libcudf to arrow where those types are not +// directly supported by Arrow. These types must be fit into 128 bits, the +// smallest decimal resolution supported by Arrow. +template +std::shared_ptr unsupported_decimals_to_arrow(column_view input, + int32_t precision, + arrow::MemoryPool* ar_mr, + rmm::cuda_stream_view stream) { - using DeviceType = int64_t; - size_type const BIT_WIDTH_RATIO = 2; // Array::Type:type::DECIMAL (128) / int64_t + constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType); rmm::device_uvector buf(input.size() * BIT_WIDTH_RATIO, stream); auto count = thrust::make_counting_iterator(0); - thrust::for_each(rmm::exec_policy(cudf::get_default_stream()), - count, - count + input.size(), - [in = input.begin(), out = buf.data()] __device__(auto in_idx) { - auto const out_idx = in_idx * 2; - out[out_idx] = in[in_idx]; - out[out_idx + 1] = in[in_idx] < 0 ? -1 : 0; - }); + thrust::for_each( + rmm::exec_policy(cudf::get_default_stream()), + count, + count + input.size(), + [in = input.begin(), out = buf.data(), BIT_WIDTH_RATIO] __device__(auto in_idx) { + auto const out_idx = in_idx * BIT_WIDTH_RATIO; + // The lowest order bits are the value, the remainder + // simply matches the sign bit to satisfy the two's + // complement integer representation of negative numbers. + out[out_idx] = in[in_idx]; +#pragma unroll BIT_WIDTH_RATIO - 1 + for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { + out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; + } + }); auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); @@ -169,7 +181,7 @@ std::shared_ptr dispatch_to_arrow::operator()( CUDF_CUDA_TRY(cudaMemcpyAsync( data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value())); - auto type = arrow::decimal(18, -input.type().scale()); + auto type = arrow::decimal(precision, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); auto buffers = std::vector>{mask, std::move(data_buffer)}; auto data = std::make_shared(type, input.size(), buffers); @@ -177,6 +189,28 @@ std::shared_ptr dispatch_to_arrow::operator()( return std::make_shared(data); } +template <> +std::shared_ptr dispatch_to_arrow::operator()( + column_view input, + cudf::type_id, + column_metadata const&, + arrow::MemoryPool* ar_mr, + rmm::cuda_stream_view stream) +{ + return unsupported_decimals_to_arrow(input, 9, ar_mr, stream); +} + +template <> +std::shared_ptr dispatch_to_arrow::operator()( + column_view input, + cudf::type_id, + column_metadata const&, + arrow::MemoryPool* ar_mr, + rmm::cuda_stream_view stream) +{ + return unsupported_decimals_to_arrow(input, 18, ar_mr, stream); +} + template <> std::shared_ptr dispatch_to_arrow::operator()( column_view input, @@ -403,14 +437,37 @@ std::shared_ptr to_arrow(table_view input, return result; } + +std::shared_ptr to_arrow(cudf::scalar const& input, + column_metadata const& metadata, + rmm::cuda_stream_view stream, + arrow::MemoryPool* ar_mr) +{ + auto const column = cudf::make_column_from_scalar(input, 1, stream); + cudf::table_view const tv{{column->view()}}; + auto const arrow_table = cudf::to_arrow(tv, {metadata}, stream); + auto const ac = arrow_table->column(0); + auto const maybe_scalar = ac->GetScalar(0); + if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); } + return maybe_scalar.ValueOrDie(); +} } // namespace detail std::shared_ptr to_arrow(table_view input, std::vector const& metadata, + rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr) { CUDF_FUNC_RANGE(); - return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr); + return detail::to_arrow(input, metadata, stream, ar_mr); } +std::shared_ptr to_arrow(cudf::scalar const& input, + column_metadata const& metadata, + rmm::cuda_stream_view stream, + arrow::MemoryPool* ar_mr) +{ + CUDF_FUNC_RANGE(); + return detail::to_arrow(input, metadata, stream, ar_mr); +} } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c7d3e2af19f..956bfc7c27d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -626,6 +626,7 @@ ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest( diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 9a5cc3733af..a898106a5b2 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -456,3 +456,98 @@ INSTANTIATE_TEST_CASE_P(FromArrowTest, std::make_tuple(0, 0), std::make_tuple(0, 3000), std::make_tuple(10000, 10000))); + +template +struct FromArrowNumericScalarTest : public cudf::test::BaseFixture {}; + +using NumericTypesNotBool = + cudf::test::Concat; +TYPED_TEST_SUITE(FromArrowNumericScalarTest, NumericTypesNotBool); + +TYPED_TEST(FromArrowNumericScalarTest, Basic) +{ + TypeParam const value{42}; + auto const arrow_scalar = arrow::MakeScalar(value); + auto const cudf_scalar = cudf::from_arrow(*arrow_scalar); + auto const cudf_numeric_scalar = + dynamic_cast*>(cudf_scalar.get()); + if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); } + EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id())); + EXPECT_EQ(cudf_numeric_scalar->value(), value); +} + +struct FromArrowDecimalScalarTest : public cudf::test::BaseFixture {}; + +// Only testing Decimal128 because that's the only size cudf and arrow have in common. +TEST_F(FromArrowDecimalScalarTest, Basic) +{ + auto const value{42}; + auto const precision{8}; + auto const scale{4}; + auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); + auto cudf_scalar = cudf::from_arrow(arrow_scalar); + + // Arrow offers a minimum of 128 bits for the Decimal type. + auto const cudf_decimal_scalar = + dynamic_cast*>(cudf_scalar.get()); + EXPECT_EQ(cudf_decimal_scalar->type(), + cudf::data_type(cudf::type_to_id(), scale)); + EXPECT_EQ(cudf_decimal_scalar->value(), value); +} + +struct FromArrowStringScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(FromArrowStringScalarTest, Basic) +{ + auto const value = std::string("hello world"); + auto const arrow_scalar = arrow::StringScalar(value); + auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + + auto const cudf_string_scalar = dynamic_cast(cudf_scalar.get()); + EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING)); + EXPECT_EQ(cudf_string_scalar->to_string(), value); +} + +struct FromArrowListScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(FromArrowListScalarTest, Basic) +{ + std::vector host_values = {1, 2, 3, 5, 6, 7, 8}; + std::vector host_validity = {true, true, true, false, true, true, true}; + + arrow::Int64Builder builder; + auto const status = builder.AppendValues(host_values, host_validity); + auto const maybe_array = builder.Finish(); + auto const array = *maybe_array; + + auto const arrow_scalar = arrow::ListScalar(array); + auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + + auto const cudf_list_scalar = dynamic_cast(cudf_scalar.get()); + EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST)); + + cudf::test::fixed_width_column_wrapper const lhs( + host_values.begin(), host_values.end(), host_validity.begin()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(lhs, cudf_list_scalar->view()); +} + +struct FromArrowStructScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(FromArrowStructScalarTest, Basic) +{ + int64_t const value{42}; + auto const underlying_arrow_scalar = arrow::MakeScalar(value); + + auto const field = arrow::field("", underlying_arrow_scalar->type); + auto const arrow_type = arrow::struct_({field}); + auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type); + auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + + auto const cudf_struct_scalar = dynamic_cast(cudf_scalar.get()); + EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT)); + + cudf::test::fixed_width_column_wrapper const col({value}); + cudf::table_view const lhs({col}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view()); +} diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 97d80984272..6bb4cdfd747 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -578,4 +579,106 @@ INSTANTIATE_TEST_CASE_P(ToArrowTest, std::make_tuple(0, 0), std::make_tuple(0, 3000))); +template +struct ToArrowNumericScalarTest : public cudf::test::BaseFixture {}; + +using NumericTypesNotBool = + cudf::test::Concat; +TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool); + +TYPED_TEST(ToArrowNumericScalarTest, Basic) +{ + TypeParam const value{42}; + auto const cudf_scalar = cudf::make_fixed_width_scalar(value); + + cudf::column_metadata const metadata{""}; + auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const ref_arrow_scalar = arrow::MakeScalar(value); + EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); +} + +struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {}; + +// Only testing Decimal128 because that's the only size cudf and arrow have in common. +TEST_F(ToArrowDecimalScalarTest, Basic) +{ + auto const value{42}; + auto const precision{18}; // cudf will convert to the widest-precision Arrow scalar of the type + int32_t const scale{4}; + + auto const cudf_scalar = + cudf::make_fixed_point_scalar(value, numeric::scale_type{scale}); + + cudf::column_metadata const metadata{""}; + auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const maybe_ref_arrow_scalar = + arrow::MakeScalar(arrow::decimal128(precision, -scale), value); + if (!maybe_ref_arrow_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); } + auto const ref_arrow_scalar = *maybe_ref_arrow_scalar; + EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); +} + +struct ToArrowStringScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(ToArrowStringScalarTest, Basic) +{ + std::string const value{"hello world"}; + auto const cudf_scalar = cudf::make_string_scalar(value); + cudf::column_metadata const metadata{""}; + auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const ref_arrow_scalar = arrow::MakeScalar(value); + EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); +} + +struct ToArrowListScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(ToArrowListScalarTest, Basic) +{ + std::vector const host_values = {1, 2, 3, 5, 6, 7, 8}; + std::vector const host_validity = {true, true, true, false, true, true, true}; + + cudf::test::fixed_width_column_wrapper const col( + host_values.begin(), host_values.end(), host_validity.begin()); + + auto const cudf_scalar = cudf::make_list_scalar(col); + + cudf::column_metadata const metadata{""}; + auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + arrow::Int64Builder builder; + auto const status = builder.AppendValues(host_values, host_validity); + auto const maybe_array = builder.Finish(); + auto const array = *maybe_array; + + auto const ref_arrow_scalar = arrow::ListScalar(array); + + EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar)); +} + +struct ToArrowStructScalarTest : public cudf::test::BaseFixture {}; + +TEST_F(ToArrowStructScalarTest, Basic) +{ + int64_t const value{42}; + auto const field_name{"a"}; + + cudf::test::fixed_width_column_wrapper const col{value}; + cudf::table_view const tbl({col}); + auto const cudf_scalar = cudf::make_struct_scalar(tbl); + + cudf::column_metadata metadata{""}; + metadata.children_meta.emplace_back(field_name); + auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const underlying_arrow_scalar = arrow::MakeScalar(value); + auto const field = arrow::field(field_name, underlying_arrow_scalar->type, false); + auto const arrow_type = arrow::struct_({field}); + auto const ref_arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type); + + EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar)); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp new file mode 100644 index 00000000000..7eac9e016eb --- /dev/null +++ b/cpp/tests/streams/interop_test.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +struct ArrowTest : public cudf::test::BaseFixture {}; + +TEST_F(ArrowTest, ToArrow) +{ + int32_t const value{42}; + auto col = cudf::test::fixed_width_column_wrapper{{value}}; + cudf::table_view tbl{{col}}; + + std::vector metadata{{""}}; + cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream()); +} + +TEST_F(ArrowTest, FromArrow) +{ + std::vector host_values = {1, 2, 3, 5, 6, 7, 8}; + std::vector host_validity = {true, true, true, false, true, true, true}; + + arrow::Int64Builder builder; + auto status = builder.AppendValues(host_values, host_validity); + auto maybe_array = builder.Finish(); + auto array = *maybe_array; + + auto field = arrow::field("", arrow::int32()); + auto schema = arrow::schema({field}); + auto table = arrow::Table::Make(schema, {array}); + cudf::from_arrow(*table, cudf::test::get_default_stream()); +} + +TEST_F(ArrowTest, ToArrowScalar) +{ + int32_t const value{42}; + auto cudf_scalar = + cudf::make_fixed_width_scalar(value, cudf::test::get_default_stream()); + + cudf::column_metadata metadata{""}; + cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream()); +} + +TEST_F(ArrowTest, FromArrowScalar) +{ + int32_t const value{42}; + auto arrow_scalar = arrow::MakeScalar(value); + cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream()); +} From d67cc5d05a6c18dd832f7b63421296fb66ae56f1 Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 22 Sep 2023 22:01:40 -0700 Subject: [PATCH 11/76] Fix assert failure for range window functions (#14168) Authors: - MithunR (https://github.com/mythrocks) - Yunsong Wang (https://github.com/PointKernel) Approvers: - Divye Gala (https://github.com/divyegala) - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/14168 --- cpp/src/rolling/grouped_rolling.cu | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 6e69b5157c2..7ac784bef43 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -357,6 +357,16 @@ template struct device_value_accessor { column_device_view const col; ///< column view of column in device + /// Checks that the type used to access device values matches the rep-type + /// of the order-by column. + struct is_correct_range_rep { + template /// Order-by type. + constexpr bool operator()() const + { + return std::is_same_v>; + } + }; + /** * @brief constructor * @@ -364,8 +374,11 @@ struct device_value_accessor { */ explicit __device__ device_value_accessor(column_device_view const& col_) : col{col_} { - cudf_assert(type_id_matches_device_storage_type(col.type().id()) && - "the data type mismatch"); + // For non-timestamp types, T must match the order-by column's type. + // For timestamp types, T must match the range rep type for the order-by column. + cudf_assert((type_id_matches_device_storage_type(col.type().id()) or + cudf::type_dispatcher(col.type(), is_correct_range_rep{})) && + "data type mismatch when accessing the order-by column"); } /** From fe3cab5595337300345573d7e64fa52cba78a6c5 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Mon, 25 Sep 2023 10:15:44 +0530 Subject: [PATCH 12/76] Fix Memcheck error found in JSON_TEST JsonReaderTest.ErrorStrings (#14164) Fix missing null mask in string column names parsing. For parsing error, the row is made null. To write output properly, the nulls need to be passed so that they can be skipped during writing output stage in `parse_data`. Fixes #14141 Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - David Wendt (https://github.com/davidwendt) - Elias Stehle (https://github.com/elstehle) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14164 --- cpp/src/io/utilities/data_casting.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index 1772e5e43fa..d16237d7afe 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -924,6 +924,9 @@ std::unique_ptr parse_data( if (col_size == 0) { return make_empty_column(col_type); } auto d_null_count = rmm::device_scalar(null_count, stream); auto null_count_data = d_null_count.data(); + if (null_mask.is_empty()) { + null_mask = cudf::detail::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); + } // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion auto str_tuples = thrust::make_transform_iterator(offset_length_begin, to_string_view_pair{data}); From 3f47b5d463445faa9f95b1cc57c46fb5b41f60a7 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:28:33 -0400 Subject: [PATCH 13/76] Move cpp/src/hash/hash_allocator.cuh to include/cudf/hashing/detail (#14163) Moves `cpp/src/hash/hash_allocator.cuh` to `include/cudf/hashing/detail` so it may be more accessible from non-src/hash source files. Also, found `cpp/src/hash/helper_functions.hpp` used in the same way a moved that one as well. No functional changes, just headers moved and includes fixed up. Reference: https://github.com/rapidsai/cudf/pull/13930#discussion_r1330118935 Closes #14143 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14163 --- .../hash => include/cudf/hashing/detail}/hash_allocator.cuh | 0 .../cudf/hashing/detail}/helper_functions.cuh | 0 cpp/src/hash/concurrent_unordered_map.cuh | 4 ++-- cpp/src/hash/unordered_multiset.cuh | 3 +-- cpp/src/io/json/json_tree.cu | 4 ++-- cpp/src/join/join_common_utils.hpp | 5 ++--- cpp/src/stream_compaction/stream_compaction_common.hpp | 5 ++--- cpp/src/text/subword/bpe_tokenizer.cuh | 3 +-- 8 files changed, 10 insertions(+), 14 deletions(-) rename cpp/{src/hash => include/cudf/hashing/detail}/hash_allocator.cuh (100%) rename cpp/{src/hash => include/cudf/hashing/detail}/helper_functions.cuh (100%) diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/include/cudf/hashing/detail/hash_allocator.cuh similarity index 100% rename from cpp/src/hash/hash_allocator.cuh rename to cpp/include/cudf/hashing/detail/hash_allocator.cuh diff --git a/cpp/src/hash/helper_functions.cuh b/cpp/include/cudf/hashing/detail/helper_functions.cuh similarity index 100% rename from cpp/src/hash/helper_functions.cuh rename to cpp/include/cudf/hashing/detail/helper_functions.cuh diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index 439b1c2d066..d773c2763df 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -16,12 +16,12 @@ #pragma once -#include -#include #include #include #include +#include +#include #include #include diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh index 87075a39ea3..183042fc0f4 100644 --- a/cpp/src/hash/unordered_multiset.cuh +++ b/cpp/src/hash/unordered_multiset.cuh @@ -16,11 +16,10 @@ #pragma once -#include - #include #include #include +#include #include #include diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index 9231040eb70..da5b0eedfbd 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -15,8 +15,6 @@ */ #include "nested_json.hpp" -#include -#include #include #include @@ -24,7 +22,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 4c1b1ed98b1..e96505e5ed6 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -17,13 +17,12 @@ #include #include +#include +#include #include #include #include -#include -#include - #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp index 58d958d2ff4..18c531e3e69 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ b/cpp/src/stream_compaction/stream_compaction_common.hpp @@ -15,12 +15,11 @@ */ #pragma once +#include +#include #include #include -#include -#include - #include #include diff --git a/cpp/src/text/subword/bpe_tokenizer.cuh b/cpp/src/text/subword/bpe_tokenizer.cuh index 83aa22aaae9..2fa879ea734 100644 --- a/cpp/src/text/subword/bpe_tokenizer.cuh +++ b/cpp/src/text/subword/bpe_tokenizer.cuh @@ -18,10 +18,9 @@ #include -#include - #include #include +#include #include #include From 036c07d363406da9e500c3d6be9a3edca28fd6c2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Sep 2023 06:36:26 -1000 Subject: [PATCH 14/76] Fix DataFrame from Series with different CategoricalIndexes (#14157) closes #14130 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/14157 --- python/cudf/cudf/core/indexed_frame.py | 7 +++++++ python/cudf/cudf/tests/test_dataframe.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 62e091b29b5..aacf1fa8dae 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -5438,6 +5438,13 @@ def _is_same_dtype(lhs_dtype, rhs_dtype): # for matching column dtype. if lhs_dtype == rhs_dtype: return True + elif ( + is_categorical_dtype(lhs_dtype) + and is_categorical_dtype(rhs_dtype) + and lhs_dtype.categories.dtype == rhs_dtype.categories.dtype + ): + # OK if categories are not all the same + return True elif ( is_categorical_dtype(lhs_dtype) and not is_categorical_dtype(rhs_dtype) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 2f531afdeb7..67b63028fab 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10408,6 +10408,19 @@ def test_dataframe_init_from_nested_dict(): assert_eq(pdf, gdf) +def test_init_from_2_categoricalindex_series_diff_categories(): + s1 = cudf.Series( + [39, 6, 4], index=cudf.CategoricalIndex(["female", "male", "unknown"]) + ) + s2 = cudf.Series( + [2, 152, 2, 242, 150], + index=cudf.CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = cudf.DataFrame([s1, s2]) + expected = pd.DataFrame([s1.to_pandas(), s2.to_pandas()]) + assert_eq(result, expected, check_dtype=False) + + def test_data_frame_values_no_cols_but_index(): result = cudf.DataFrame(index=range(5)).values expected = pd.DataFrame(index=range(5)).values From ddd2b0dfac0903c5f17d581eca5d6b945ede9451 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Mon, 25 Sep 2023 13:14:18 -0500 Subject: [PATCH 15/76] Allow explicit `shuffle="p2p"` within dask-cudf API (#13893) This PR allows explicit `shuffle="p2p"` usage within the dask-cudf API now that https://github.com/dask/distributed/pull/7743 is in. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) - Ray Douglass (https://github.com/raydouglass) - gpuCI (https://github.com/GPUtester) - Mike Wendt (https://github.com/mike-wendt) - AJ Schmidt (https://github.com/ajschmidt8) - GALI PREM SAGAR (https://github.com/galipremsagar) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/13893 --- python/dask_cudf/dask_cudf/backends.py | 31 ++++++++++++++++--- python/dask_cudf/dask_cudf/sorting.py | 26 +++++++++++----- .../dask_cudf/tests/test_dispatch.py | 11 +++++-- .../dask_cudf/tests/test_distributed.py | 22 ++++++++++++- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index e3f4f04eb85..344b03c631d 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -373,22 +373,37 @@ def percentile_cudf(a, q, interpolation="linear"): @pyarrow_schema_dispatch.register((cudf.DataFrame,)) -def _get_pyarrow_schema_cudf(obj, preserve_index=True, **kwargs): +def _get_pyarrow_schema_cudf(obj, preserve_index=None, **kwargs): if kwargs: warnings.warn( "Ignoring the following arguments to " f"`pyarrow_schema_dispatch`: {list(kwargs)}" ) - return meta_nonempty(obj).to_arrow(preserve_index=preserve_index).schema + + return _cudf_to_table( + meta_nonempty(obj), preserve_index=preserve_index + ).schema @to_pyarrow_table_dispatch.register(cudf.DataFrame) -def _cudf_to_table(obj, preserve_index=True, **kwargs): +def _cudf_to_table(obj, preserve_index=None, **kwargs): if kwargs: warnings.warn( "Ignoring the following arguments to " f"`to_pyarrow_table_dispatch`: {list(kwargs)}" ) + + # TODO: Remove this logic when cudf#14159 is resolved + # (see: https://github.com/rapidsai/cudf/issues/14159) + if preserve_index and isinstance(obj.index, cudf.RangeIndex): + obj = obj.copy() + obj.index.name = ( + obj.index.name + if obj.index.name is not None + else "__index_level_0__" + ) + obj.index = obj.index._as_int_index() + return obj.to_arrow(preserve_index=preserve_index) @@ -401,7 +416,15 @@ def _table_to_cudf(obj, table, self_destruct=None, **kwargs): f"Ignoring the following arguments to " f"`from_pyarrow_table_dispatch`: {list(kwargs)}" ) - return obj.from_arrow(table) + result = obj.from_arrow(table) + + # TODO: Remove this logic when cudf#14159 is resolved + # (see: https://github.com/rapidsai/cudf/issues/14159) + if "__index_level_0__" in result.index.names: + assert len(result.index.names) == 1 + result.index.name = None + + return result @union_categoricals_dispatch.register((cudf.Series, cudf.BaseIndex)) diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index e841f2d8830..d6c9c1be73c 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -6,7 +6,7 @@ import numpy as np import tlz as toolz -import dask +from dask import config from dask.base import tokenize from dask.dataframe import methods from dask.dataframe.core import DataFrame, Index, Series @@ -18,6 +18,8 @@ from cudf.api.types import is_categorical_dtype from cudf.utils.utils import _dask_cudf_nvtx_annotate +_SHUFFLE_SUPPORT = ("tasks", "p2p") # "disk" not supported + @_dask_cudf_nvtx_annotate def set_index_post(df, index_name, drop, column_dtype): @@ -307,15 +309,25 @@ def sort_values( return df4 +def get_default_shuffle_method(): + # Note that `dask.utils.get_default_shuffle_method` + # will return "p2p" by default when a distributed + # client is present. Dask-cudf supports "p2p", but + # will not use it by default (yet) + default = config.get("dataframe.shuffle.method", "tasks") + if default not in _SHUFFLE_SUPPORT: + default = "tasks" + return default + + def _get_shuffle_type(shuffle): # Utility to set the shuffle-kwarg default - # and to validate user-specified options. - # The only supported options is currently "tasks" - shuffle = shuffle or dask.config.get("shuffle", "tasks") - if shuffle != "tasks": + # and to validate user-specified options + shuffle = shuffle or get_default_shuffle_method() + if shuffle not in _SHUFFLE_SUPPORT: raise ValueError( - f"Dask-cudf only supports in-memory shuffling with " - f"'tasks'. Got shuffle={shuffle}" + "Dask-cudf only supports the following shuffle " + f"methods: {_SHUFFLE_SUPPORT}. Got shuffle={shuffle}" ) return shuffle diff --git a/python/dask_cudf/dask_cudf/tests/test_dispatch.py b/python/dask_cudf/dask_cudf/tests/test_dispatch.py index cf49b1df4f4..c64e25fd437 100644 --- a/python/dask_cudf/dask_cudf/tests/test_dispatch.py +++ b/python/dask_cudf/dask_cudf/tests/test_dispatch.py @@ -22,18 +22,25 @@ def test_is_categorical_dispatch(): assert is_categorical_dtype(cudf.Index([1, 2, 3], dtype="category")) -def test_pyarrow_conversion_dispatch(): +@pytest.mark.parametrize("preserve_index", [True, False]) +def test_pyarrow_conversion_dispatch(preserve_index): from dask.dataframe.dispatch import ( from_pyarrow_table_dispatch, to_pyarrow_table_dispatch, ) df1 = cudf.DataFrame(np.random.randn(10, 3), columns=list("abc")) - df2 = from_pyarrow_table_dispatch(df1, to_pyarrow_table_dispatch(df1)) + df2 = from_pyarrow_table_dispatch( + df1, to_pyarrow_table_dispatch(df1, preserve_index=preserve_index) + ) assert type(df1) == type(df2) assert_eq(df1, df2) + # Check that preserve_index does not produce a RangeIndex + if preserve_index: + assert not isinstance(df2.index, cudf.RangeIndex) + @pytest.mark.parametrize("index", [None, [1, 2] * 5]) def test_deterministic_tokenize(index): diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py index e24feaa2ea4..db3f3695648 100644 --- a/python/dask_cudf/dask_cudf/tests/test_distributed.py +++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import numba.cuda import pytest @@ -77,3 +77,23 @@ def test_str_series_roundtrip(): actual = dask_series.compute() assert_eq(actual, expected) + + +def test_p2p_shuffle(): + # Check that we can use `shuffle="p2p"` + with dask_cuda.LocalCUDACluster(n_workers=1) as cluster: + with Client(cluster): + ddf = ( + dask.datasets.timeseries( + start="2000-01-01", + end="2000-01-08", + dtypes={"x": int}, + ) + .reset_index(drop=True) + .to_backend("cudf") + ) + dd.assert_eq( + ddf.sort_values("x", shuffle="p2p").compute(), + ddf.compute().sort_values("x"), + check_index=False, + ) From 1b925bfc7741eb22fed0a978fa0e1d0d5dfee601 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 25 Sep 2023 13:09:16 -0700 Subject: [PATCH 16/76] Add Parquet reader benchmarks for row selection (#14147) Re-enabled the group of benchmarks that compares row selection options in Parquet reader. Use `read_parquet_metadata` to get the column names and number of row groups. Clean up read chunk computation for ORC and Parquet benchmarks. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - https://github.com/nvdbaranec - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14147 --- cpp/benchmarks/io/cuio_common.cpp | 18 ++--- cpp/benchmarks/io/orc/orc_reader_options.cpp | 12 ++-- .../io/parquet/parquet_reader_options.cpp | 65 +++++++++++-------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 6b8af91b842..b1aaef41340 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -141,17 +142,18 @@ std::vector select_column_names(std::vector const& col return col_names_to_read; } -std::vector segments_in_chunk(int num_segments, int num_chunks, int chunk) +std::vector segments_in_chunk(int num_segments, int num_chunks, int chunk_idx) { CUDF_EXPECTS(num_segments >= num_chunks, "Number of chunks cannot be greater than the number of segments in the file"); - auto start_segment = [num_segments, num_chunks](int chunk) { - return num_segments * chunk / num_chunks; - }; - std::vector selected_segments; - for (auto segment = start_segment(chunk); segment < start_segment(chunk + 1); ++segment) { - selected_segments.push_back(segment); - } + CUDF_EXPECTS(chunk_idx < num_chunks, + "Chunk index must be smaller than the number of chunks in the file"); + + auto const segments_in_chunk = cudf::util::div_rounding_up_unsafe(num_segments, num_chunks); + auto const begin_segment = std::min(chunk_idx * segments_in_chunk, num_segments); + auto const end_segment = std::min(begin_segment + segments_in_chunk, num_segments); + std::vector selected_segments(end_segment - begin_segment); + std::iota(selected_segments.begin(), selected_segments.end(), begin_segment); return selected_segments; } diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp index 647a411c89d..1f656f7ea70 100644 --- a/cpp/benchmarks/io/orc/orc_reader_options.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -30,7 +31,7 @@ constexpr int64_t data_size = 512 << 20; // The number of separate read calls to use when reading files in multiple chunks // Each call reads roughly equal amounts of data -constexpr int32_t chunked_read_num_chunks = 8; +constexpr int32_t chunked_read_num_chunks = 4; std::vector get_top_level_col_names(cudf::io::source_info const& source) { @@ -88,7 +89,7 @@ void BM_orc_read_varying_options(nvbench::state& state, auto const num_stripes = cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes(); - cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks); auto mem_stats_logger = cudf::memory_stats_logger(); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); @@ -99,7 +100,6 @@ void BM_orc_read_varying_options(nvbench::state& state, timer.start(); cudf::size_type rows_read = 0; for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { - auto const is_last_chunk = chunk == (num_chunks - 1); switch (RowSelection) { case row_selection::ALL: break; case row_selection::STRIPES: @@ -108,7 +108,6 @@ void BM_orc_read_varying_options(nvbench::state& state, case row_selection::NROWS: read_options.set_skip_rows(chunk * chunk_row_cnt); read_options.set_num_rows(chunk_row_cnt); - if (is_last_chunk) read_options.set_num_rows(-1); break; default: CUDF_FAIL("Unsupported row selection method"); } @@ -132,9 +131,6 @@ using col_selections = nvbench::enum_type_list; -using row_selections = - nvbench::enum_type_list; - NVBENCH_BENCH_TYPES(BM_orc_read_varying_options, NVBENCH_TYPE_AXES(col_selections, nvbench::enum_type_list, @@ -146,6 +142,8 @@ NVBENCH_BENCH_TYPES(BM_orc_read_varying_options, {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"}) .set_min_samples(4); +using row_selections = + nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_orc_read_varying_options, NVBENCH_TYPE_AXES(nvbench::enum_type_list, row_selections, diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp index 4105f2182d7..9f221de7da2 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -26,21 +27,21 @@ // Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to // run on most GPUs, but large enough to allow highest throughput -constexpr std::size_t data_size = 512 << 20; -constexpr std::size_t row_group_size = 128 << 20; +constexpr std::size_t data_size = 512 << 20; +// The number of separate read calls to use when reading files in multiple chunks +// Each call reads roughly equal amounts of data +constexpr int32_t chunked_read_num_chunks = 4; std::vector get_top_level_col_names(cudf::io::source_info const& source) { - cudf::io::parquet_reader_options const read_options = - cudf::io::parquet_reader_options::builder(source); - auto const schema = cudf::io::read_parquet(read_options).metadata.schema_info; - - std::vector names; - names.reserve(schema.size()); - std::transform(schema.cbegin(), schema.cend(), std::back_inserter(names), [](auto const& c) { - return c.name; - }); - return names; + auto const top_lvl_cols = cudf::io::read_parquet_metadata(source).schema().root().children(); + std::vector col_names; + std::transform(top_lvl_cols.cbegin(), + top_lvl_cols.cend(), + std::back_inserter(col_names), + [](auto const& col_meta) { return col_meta.name(); }); + + return col_names; } template , nvbench::enum_type>) { + auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks; + auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES; auto constexpr uses_pd_metadata = UsesPandasMetadata == uses_pandas_metadata::YES; @@ -87,9 +90,8 @@ void BM_parquet_read_options(nvbench::state& state, .use_pandas_metadata(uses_pd_metadata) .timestamp_type(ts_type); - // TODO: add read_parquet_metadata to properly calculate #row_groups - auto constexpr num_row_groups = data_size / row_group_size; - auto constexpr num_chunks = 1; + auto const num_row_groups = read_parquet_metadata(source_sink.make_source_info()).num_rowgroups(); + auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks); auto mem_stats_logger = cudf::memory_stats_logger(); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); @@ -100,18 +102,15 @@ void BM_parquet_read_options(nvbench::state& state, timer.start(); cudf::size_type rows_read = 0; for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { - auto const is_last_chunk = chunk == (num_chunks - 1); switch (RowSelection) { case row_selection::ALL: break; case row_selection::ROW_GROUPS: { - auto row_groups_to_read = segments_in_chunk(num_row_groups, num_chunks, chunk); - if (is_last_chunk) { - // Need to assume that an additional "overflow" row group is present - row_groups_to_read.push_back(num_row_groups); - } - read_options.set_row_groups({row_groups_to_read}); + read_options.set_row_groups({segments_in_chunk(num_row_groups, num_chunks, chunk)}); } break; - case row_selection::NROWS: [[fallthrough]]; + case row_selection::NROWS: + read_options.set_skip_rows(chunk * chunk_row_cnt); + read_options.set_num_rows(chunk_row_cnt); + break; default: CUDF_FAIL("Unsupported row selection method"); } @@ -130,14 +129,26 @@ void BM_parquet_read_options(nvbench::state& state, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } +using row_selections = + nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + row_selections, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_row_selection") + .set_type_axes_names({"column_selection", + "row_selection", + "str_to_categories", + "uses_pandas_metadata", + "timestamp_type"}) + .set_min_samples(4); + using col_selections = nvbench::enum_type_list; - -// TODO: row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file -// and determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863 - NVBENCH_BENCH_TYPES(BM_parquet_read_options, NVBENCH_TYPE_AXES(col_selections, nvbench::enum_type_list, From f3402c402c2d0be54a6f2060e1bd74e284c1e687 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 25 Sep 2023 14:10:44 -0700 Subject: [PATCH 17/76] Add stream parameter to external dict APIs (#14115) This PR adds stream parameter to public dictionary APIs, which include: 1. `cudf::dictionary::encode` 2. `cudf::dictionary::decode` 3. `cudf::dictionary::get_index` 4. `cudf::dictionary::add_keys` 5. `cudf::dictionary::remove_keys` 6. `cudf::dictionary::remove_unused_keys` 7. `cudf::dictionary::set_keys` 8. `cudf::dictionary::match_dictionaries` Reference [13744](https://github.com/rapidsai/cudf/issues/13744) Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) - Yunsong Wang (https://github.com/PointKernel) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14115 --- cpp/include/cudf/dictionary/encode.hpp | 6 +- cpp/include/cudf/dictionary/search.hpp | 6 +- cpp/include/cudf/dictionary/update_keys.hpp | 16 ++- cpp/include/cudf_test/column_wrapper.hpp | 18 +++- cpp/src/dictionary/add_keys.cu | 3 +- cpp/src/dictionary/decode.cu | 5 +- cpp/src/dictionary/encode.cu | 5 +- cpp/src/dictionary/remove_keys.cu | 6 +- cpp/src/dictionary/search.cu | 11 +- cpp/src/dictionary/set_keys.cu | 9 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/dictionary_test.cpp | 105 ++++++++++++++++++++ 12 files changed, 164 insertions(+), 27 deletions(-) create mode 100644 cpp/tests/streams/dictionary_test.cpp diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index fb13eabe11a..959b785bf87 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,12 +53,14 @@ namespace dictionary { * * @param column The column to dictionary encode * @param indices_type The integer type to use for the indices + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Returns a dictionary column */ std::unique_ptr encode( column_view const& column, data_type indices_type = data_type{type_id::UINT32}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -72,11 +74,13 @@ std::unique_ptr encode( * @endcode * * @param dictionary_column Existing dictionary column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New column with type matching the dictionary_column's keys */ std::unique_ptr decode( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index ed7a9c84693..1b72cf42acd 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,12 +37,14 @@ namespace dictionary { * * @param dictionary The dictionary to search for the key. * @param key The value to search for in the dictionary keyset. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory. - * @return Numeric scalar index value of the key within the dictionary + * @return Numeric scalar index value of the key within the dictionary. */ std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index 2fcfb5e1f7c..81728e1ff73 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,13 +51,15 @@ namespace dictionary { * @throw cudf_logic_error if the new_keys contain nulls. * * @param dictionary_column Existing dictionary column. - * @param new_keys New keys to incorporate into the dictionary_column + * @param new_keys New keys to incorporate into the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,13 +83,15 @@ std::unique_ptr add_keys( * @throw cudf_logic_error if the keys_to_remove contain nulls. * * @param dictionary_column Existing dictionary column. - * @param keys_to_remove The keys to remove from the dictionary_column + * @param keys_to_remove The keys to remove from the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -103,11 +107,13 @@ std::unique_ptr remove_keys( * @endcode * * @param dictionary_column Existing dictionary column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -134,12 +140,14 @@ std::unique_ptr remove_unused_keys( * * @param dictionary_column Existing dictionary column. * @param keys New keys to use for the output column. Must not contain nulls. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -149,11 +157,13 @@ std::unique_ptr set_keys( * The result is a vector of new dictionaries with a common set of keys. * * @param input Dictionary columns to match keys. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary columns. */ std::vector> match_dictionaries( cudf::host_span input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index cc8cac35ef4..c0932b81dc3 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -944,8 +944,10 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end)); + wrapped = + cudf::dictionary::encode(fixed_width_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -978,7 +980,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { : column_wrapper{} { wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end, v)); + fixed_width_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1134,7 +1138,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1169,7 +1175,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index ab22c07e4d5..3973100aced 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -130,10 +130,11 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::add_keys(dictionary_column, keys, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 01411d06b62..fdf546b5875 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,10 +65,11 @@ std::unique_ptr decode(dictionary_column_view const& source, } // namespace detail std::unique_ptr decode(dictionary_column_view const& source, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::decode(source, cudf::get_default_stream(), mr); + return detail::decode(source, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index fe8e777b694..c92b57f0cac 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,10 +89,11 @@ data_type get_indices_type_for_size(size_type keys_size) std::unique_ptr encode(column_view const& input_column, data_type indices_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr); + return detail::encode(input_column, indices_type, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 9fe4a63373b..86b70f1119b 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -195,17 +195,19 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr); + return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr); } std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr); + return detail::remove_unused_keys(dictionary_column, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 8e97a387780..e35aded1984 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,10 +79,8 @@ struct find_index_fn { using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); - auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()), - keys_view->begin(), - keys_view->end(), - find_key); + auto iter = thrust::equal_range( + rmm::exec_policy(stream), keys_view->begin(), keys_view->end(), find_key); return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, thrust::distance(keys_view->begin(), iter.first), @@ -176,10 +174,11 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_index(dictionary, key, cudf::get_default_stream(), mr); + return detail::get_index(dictionary, key, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 36f5021d305..b49cf7850b1 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -241,17 +241,20 @@ std::pair>, std::vector> match_d std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::set_keys(dictionary_column, keys, stream, mr); } std::vector> match_dictionaries( - cudf::host_span input, rmm::mr::device_memory_resource* mr) + cudf::host_span input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::match_dictionaries(input, cudf::get_default_stream(), mr); + return detail::match_dictionaries(input, stream, mr); } } // namespace dictionary diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 956bfc7c27d..68ff6c54c99 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -629,6 +629,7 @@ ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE testing diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp new file mode 100644 index 00000000000..f48e64c078e --- /dev/null +++ b/cpp/tests/streams/dictionary_test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +class DictionaryTest : public cudf::test::BaseFixture {}; + +TEST_F(DictionaryTest, Encode) +{ + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4, 5}); + cudf::data_type int32_type(cudf::type_id::UINT32); + cudf::column_view col_view = col; + cudf::dictionary::encode(col_view, int32_type, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, Decode) +{ + // keys = {0, 2, 6}, indices = {0, 1, 1, 2, 2} + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::decode(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, GetIndex) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::numeric_scalar key_scalar(2, true, cudf::test::get_default_stream()); + cudf::dictionary::get_index(dict_col_view, key_scalar, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, AddKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper new_keys_col({8, 9}); + cudf::dictionary::add_keys(dict_col_view, new_keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_to_remove_col({2}); + cudf::dictionary::remove_keys( + dict_col_view, keys_to_remove_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveUnsedKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::remove_unused_keys(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, SetKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::set_keys(dict_col_view, keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, MatchDictionaries) +{ + std::vector elements_a{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col_a(elements_a.begin(), elements_a.end()); + cudf::dictionary_column_view dict_col_view_a = dict_col_a; + + std::vector elements_b{1, 3, 4, 5, 5}; + cudf::test::dictionary_column_wrapper dict_col_b(elements_b.begin(), elements_b.end()); + cudf::dictionary_column_view dict_col_view_b = dict_col_b; + + std::vector dicts = {dict_col_view_a, dict_col_view_b}; + + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::match_dictionaries(dicts, cudf::test::get_default_stream()); +} From 2e1a17d6519ea018921e35075306e01b4fdddf72 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 25 Sep 2023 15:53:55 -0700 Subject: [PATCH 18/76] Replace Python scalar conversions with libcudf (#14124) This PR replaces the various Cython converters for different libcudf scalar types by using the new libcudf `[to|from]_arrow` overloads for scalars introduced in #14121. This change dramatically simplifies the Cython code and paves the way for implementation of a pylibcudf.Scalar object. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/14124 --- python/cudf/cudf/_lib/cpp/interop.pxd | 11 +- python/cudf/cudf/_lib/interop.pyx | 95 +++++- python/cudf/cudf/_lib/scalar.pyx | 448 +++++--------------------- python/cudf/cudf/tests/test_list.py | 4 +- python/cudf/cudf/tests/test_struct.py | 35 +- python/cudf/cudf/utils/dtypes.py | 18 -- 6 files changed, 210 insertions(+), 401 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/interop.pxd b/python/cudf/cudf/_lib/cpp/interop.pxd index e81f0d617fb..88e9d83ee98 100644 --- a/python/cudf/cudf/_lib/cpp/interop.pxd +++ b/python/cudf/cudf/_lib/cpp/interop.pxd @@ -1,12 +1,13 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector -from pyarrow.lib cimport CTable +from pyarrow.lib cimport CScalar, CTable from cudf._lib.types import cudf_to_np_types, np_to_cudf_types +from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view @@ -24,6 +25,7 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \ ) except + cdef unique_ptr[table] from_arrow(CTable input) except + + cdef unique_ptr[scalar] from_arrow(CScalar input) except + cdef cppclass column_metadata: column_metadata() except + @@ -35,3 +37,8 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \ table_view input, vector[column_metadata] metadata, ) except + + + cdef shared_ptr[CScalar] to_arrow( + const scalar& input, + column_metadata metadata, + ) except + diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 8fd2a409d90..639754fc54f 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -4,7 +4,14 @@ from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector -from pyarrow.lib cimport CTable, pyarrow_unwrap_table, pyarrow_wrap_table +from pyarrow.lib cimport ( + CScalar, + CTable, + pyarrow_unwrap_scalar, + pyarrow_unwrap_table, + pyarrow_wrap_scalar, + pyarrow_wrap_table, +) from cudf._lib.cpp.interop cimport ( DLManagedTensor, @@ -14,12 +21,22 @@ from cudf._lib.cpp.interop cimport ( to_arrow as cpp_to_arrow, to_dlpack as cpp_to_dlpack, ) +from cudf._lib.cpp.scalar.scalar cimport fixed_point_scalar, scalar from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view +from cudf._lib.cpp.types cimport type_id +from cudf._lib.cpp.wrappers.decimals cimport ( + decimal32, + decimal64, + decimal128, + scale_type, +) +from cudf._lib.scalar cimport DeviceScalar from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns from cudf.api.types import is_list_dtype, is_struct_dtype from cudf.core.buffer import acquire_spill_lock +from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype def from_dlpack(dlpack_capsule): @@ -182,3 +199,79 @@ def from_arrow(object input_table): c_result = move(cpp_from_arrow(cpp_arrow_table.get()[0])) return columns_from_unique_ptr(move(c_result)) + + +@acquire_spill_lock() +def to_arrow_scalar(DeviceScalar source_scalar): + """Convert a scalar to a PyArrow scalar. + + Parameters + ---------- + source_scalar : the scalar to convert + + Returns + ------- + pyarrow.lib.Scalar + """ + cdef vector[column_metadata] cpp_metadata = gather_metadata( + [("", source_scalar.dtype)] + ) + cdef const scalar* source_scalar_ptr = source_scalar.get_raw_ptr() + + cdef shared_ptr[CScalar] cpp_arrow_scalar + with nogil: + cpp_arrow_scalar = cpp_to_arrow( + source_scalar_ptr[0], cpp_metadata[0] + ) + + return pyarrow_wrap_scalar(cpp_arrow_scalar) + + +@acquire_spill_lock() +def from_arrow_scalar(object input_scalar, output_dtype=None): + """Convert from PyArrow scalar to a cudf scalar. + + Parameters + ---------- + input_scalar : PyArrow scalar + output_dtype : output type to cast to, ignored except for decimals + + Returns + ------- + cudf._lib.DeviceScalar + """ + cdef shared_ptr[CScalar] cpp_arrow_scalar = ( + pyarrow_unwrap_scalar(input_scalar) + ) + cdef unique_ptr[scalar] c_result + + with nogil: + c_result = move(cpp_from_arrow(cpp_arrow_scalar.get()[0])) + + cdef type_id ctype = c_result.get().type().id() + if ctype == type_id.DECIMAL128: + if output_dtype is None: + # Decimals must be cast to the cudf dtype of the right width + raise ValueError( + "Decimal scalars must be constructed with a dtype" + ) + + if isinstance(output_dtype, Decimal32Dtype): + c_result.reset( + new fixed_point_scalar[decimal32]( + ( c_result.get()).value(), + scale_type(-input_scalar.type.scale), + c_result.get().is_valid() + ) + ) + elif isinstance(output_dtype, Decimal64Dtype): + c_result.reset( + new fixed_point_scalar[decimal64]( + ( c_result.get()).value(), + scale_type(-input_scalar.type.scale), + c_result.get().is_valid() + ) + ) + # Decimal128Dtype is a no-op, no conversion needed. + + return DeviceScalar.from_unique_ptr(move(c_result), output_dtype) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 0407785b2d8..5ab286c5701 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -2,22 +2,13 @@ cimport cython -import decimal +import copy import numpy as np import pandas as pd import pyarrow as pa -from libc.stdint cimport ( - int8_t, - int16_t, - int32_t, - int64_t, - uint8_t, - uint16_t, - uint32_t, - uint64_t, -) +from libc.stdint cimport int64_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -25,38 +16,22 @@ from libcpp.utility cimport move from rmm._lib.memory_resource cimport get_current_device_resource import cudf -from cudf._lib.types import ( - LIBCUDF_TO_SUPPORTED_NUMPY_TYPES, - datetime_unit_map, - duration_unit_map, -) +from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES from cudf.core.dtypes import ListDtype, StructDtype from cudf.core.missing import NA, NaT -from cudf._lib.column cimport Column -from cudf._lib.cpp.column.column_view cimport column_view -from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id -from cudf._lib.interop import from_arrow, to_arrow +from cudf._lib.interop import from_arrow_scalar, to_arrow_scalar cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.cpp.scalar.scalar cimport ( duration_scalar, - fixed_point_scalar, list_scalar, - numeric_scalar, scalar, - string_scalar, struct_scalar, timestamp_scalar, ) -from cudf._lib.cpp.wrappers.decimals cimport ( - decimal32, - decimal64, - decimal128, - scale_type, -) from cudf._lib.cpp.wrappers.durations cimport ( duration_ms, duration_ns, @@ -69,7 +44,21 @@ from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_s, timestamp_us, ) -from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns + + +def _replace_nested(obj, check, replacement): + if isinstance(obj, list): + for i, item in enumerate(obj): + if check(item): + obj[i] = replacement + elif isinstance(item, (dict, list)): + _replace_nested(item, check, replacement) + elif isinstance(obj, dict): + for k, v in obj.items(): + if check(v): + obj[k] = replacement + elif isinstance(v, (dict, list)): + _replace_nested(v, check, replacement) # The DeviceMemoryResource attribute could be released prematurely @@ -97,61 +86,61 @@ cdef class DeviceScalar: A NumPy dtype. """ self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object') - self._set_value(value, self._dtype) - - def _set_value(self, value, dtype): - # IMPORTANT: this should only ever be called from __init__ - valid = not _is_null_host_scalar(value) - - if isinstance(dtype, cudf.core.dtypes.DecimalDtype): - _set_decimal_from_scalar( - self.c_value, value, dtype, valid) - elif isinstance(dtype, cudf.ListDtype): - _set_list_from_pylist( - self.c_value, value, dtype, valid) - elif isinstance(dtype, cudf.StructDtype): - _set_struct_from_pydict(self.c_value, value, dtype, valid) + + if cudf.utils.utils.is_na_like(value): + value = None + else: + # TODO: For now we always deepcopy the input value to avoid + # overwriting the input values when replacing nulls. Since it's + # just host values it's not that expensive, but we could consider + # alternatives. + value = copy.deepcopy(value) + _replace_nested(value, cudf.utils.utils.is_na_like, None) + + if isinstance(dtype, cudf.core.dtypes._BaseDtype): + pa_type = dtype.to_arrow() elif pd.api.types.is_string_dtype(dtype): - _set_string_from_np_string(self.c_value, value, valid) - elif pd.api.types.is_numeric_dtype(dtype): - _set_numeric_from_np_scalar(self.c_value, - value, - dtype, - valid) - elif pd.api.types.is_datetime64_dtype(dtype): - _set_datetime64_from_np_scalar( - self.c_value, value, dtype, valid - ) - elif pd.api.types.is_timedelta64_dtype(dtype): - _set_timedelta64_from_np_scalar( - self.c_value, value, dtype, valid - ) + # Have to manually convert object types, which we use internally + # for strings but pyarrow only supports as unicode 'U' + pa_type = pa.string() else: - raise ValueError( - f"Cannot convert value of type " - f"{type(value).__name__} to cudf scalar" - ) + pa_type = pa.from_numpy_dtype(dtype) + + pa_scalar = pa.scalar(value, type=pa_type) + + # Note: This factory-like behavior in __init__ will be removed when + # migrating to pylibcudf. + cdef DeviceScalar obj = from_arrow_scalar(pa_scalar, self._dtype) + self.c_value.swap(obj.c_value) def _to_host_scalar(self): - if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype): - result = _get_py_decimal_from_fixed_point(self.c_value) - elif cudf.api.types.is_struct_dtype(self.dtype): - result = _get_py_dict_from_struct(self.c_value, self.dtype) - elif cudf.api.types.is_list_dtype(self.dtype): - result = _get_py_list_from_list(self.c_value, self.dtype) - elif pd.api.types.is_string_dtype(self.dtype): - result = _get_py_string_from_string(self.c_value) - elif pd.api.types.is_numeric_dtype(self.dtype): - result = _get_np_scalar_from_numeric(self.c_value) - elif pd.api.types.is_datetime64_dtype(self.dtype): - result = _get_np_scalar_from_timestamp64(self.c_value) - elif pd.api.types.is_timedelta64_dtype(self.dtype): - result = _get_np_scalar_from_timedelta64(self.c_value) + is_datetime = self.dtype.kind == "M" + is_timedelta = self.dtype.kind == "m" + + null_type = NaT if is_datetime or is_timedelta else NA + + ps = to_arrow_scalar(self) + if not ps.is_valid: + return null_type + + # TODO: The special handling of specific types below does not currently + # extend to nested types containing those types (e.g. List[timedelta] + # where the timedelta would overflow). We should eventually account for + # those cases, but that will require more careful consideration of how + # to traverse the contents of the nested data. + if is_datetime or is_timedelta: + time_unit, _ = np.datetime_data(self.dtype) + # Cast to int64 to avoid overflow + ps_cast = ps.cast('int64').as_py() + out_type = np.datetime64 if is_datetime else np.timedelta64 + ret = out_type(ps_cast, time_unit) + elif cudf.api.types.is_numeric_dtype(self.dtype): + ret = ps.type.to_pandas_dtype()(ps.as_py()) else: - raise ValueError( - "Could not convert cudf::scalar to a Python value" - ) - return result + ret = ps.as_py() + + _replace_nested(ret, lambda item: item is None, NA) + return ret @property def dtype(self): @@ -236,42 +225,9 @@ cdef class DeviceScalar: return s -cdef _set_string_from_np_string(unique_ptr[scalar]& s, value, bool valid=True): - value = value if valid else "" - s.reset(new string_scalar(value.encode(), valid)) - - -cdef _set_numeric_from_np_scalar(unique_ptr[scalar]& s, - object value, - object dtype, - bool valid=True): - value = value if valid else 0 - if dtype == "int8": - s.reset(new numeric_scalar[int8_t](value, valid)) - elif dtype == "int16": - s.reset(new numeric_scalar[int16_t](value, valid)) - elif dtype == "int32": - s.reset(new numeric_scalar[int32_t](value, valid)) - elif dtype == "int64": - s.reset(new numeric_scalar[int64_t](value, valid)) - elif dtype == "uint8": - s.reset(new numeric_scalar[uint8_t](value, valid)) - elif dtype == "uint16": - s.reset(new numeric_scalar[uint16_t](value, valid)) - elif dtype == "uint32": - s.reset(new numeric_scalar[uint32_t](value, valid)) - elif dtype == "uint64": - s.reset(new numeric_scalar[uint64_t](value, valid)) - elif dtype == "float32": - s.reset(new numeric_scalar[float](value, valid)) - elif dtype == "float64": - s.reset(new numeric_scalar[double](value, valid)) - elif dtype == "bool": - s.reset(new numeric_scalar[bool](value, valid)) - else: - raise ValueError(f"dtype not supported: {dtype}") - - +# TODO: Currently the only uses of this function and the one below are in +# _create_proxy_nat_scalar. See if that code path can be simplified to excise +# or at least simplify these implementations. cdef _set_datetime64_from_np_scalar(unique_ptr[scalar]& s, object value, object dtype, @@ -324,253 +280,6 @@ cdef _set_timedelta64_from_np_scalar(unique_ptr[scalar]& s, else: raise ValueError(f"dtype not supported: {dtype}") -cdef _set_decimal_from_scalar(unique_ptr[scalar]& s, - object value, - object dtype, - bool valid=True): - value = cudf.utils.dtypes._decimal_to_int64(value) if valid else 0 - if isinstance(dtype, cudf.Decimal64Dtype): - s.reset( - new fixed_point_scalar[decimal64]( - np.int64(value), scale_type(-dtype.scale), valid - ) - ) - elif isinstance(dtype, cudf.Decimal32Dtype): - s.reset( - new fixed_point_scalar[decimal32]( - np.int32(value), scale_type(-dtype.scale), valid - ) - ) - elif isinstance(dtype, cudf.Decimal128Dtype): - s.reset( - new fixed_point_scalar[decimal128]( - value, scale_type(-dtype.scale), valid - ) - ) - else: - raise ValueError(f"dtype not supported: {dtype}") - -cdef _set_struct_from_pydict(unique_ptr[scalar]& s, - object value, - object dtype, - bool valid=True): - arrow_schema = dtype.to_arrow() - columns = [str(i) for i in range(len(arrow_schema))] - if valid: - pyarrow_table = pa.Table.from_arrays( - [ - pa.array([value[f.name]], from_pandas=True, type=f.type) - for f in arrow_schema - ], - names=columns - ) - else: - pyarrow_table = pa.Table.from_arrays( - [ - pa.array([NA], from_pandas=True, type=f.type) - for f in arrow_schema - ], - names=columns - ) - - data = from_arrow(pyarrow_table) - cdef table_view struct_view = table_view_from_columns(data) - - s.reset( - new struct_scalar(struct_view, valid) - ) - -cdef _get_py_dict_from_struct(unique_ptr[scalar]& s, dtype): - if not s.get()[0].is_valid(): - return NA - - cdef table_view struct_table_view = (s.get()).view() - columns = columns_from_table_view(struct_table_view, None) - struct_col = cudf.core.column.build_struct_column( - names=dtype.fields.keys(), - children=tuple(columns), - size=1, - ) - table = to_arrow([struct_col], [("None", dtype)]) - python_dict = table.to_pydict()["None"][0] - return {k: _nested_na_replace([python_dict[k]])[0] for k in python_dict} - -cdef _set_list_from_pylist(unique_ptr[scalar]& s, - object value, - object dtype, - bool valid=True): - - value = value if valid else [NA] - cdef Column col - if isinstance(dtype.element_type, ListDtype): - pa_type = dtype.element_type.to_arrow() - else: - pa_type = dtype.to_arrow().value_type - col = cudf.core.column.as_column( - pa.array(value, from_pandas=True, type=pa_type) - ) - cdef column_view col_view = col.view() - s.reset( - new list_scalar(col_view, valid) - ) - - -cdef _get_py_list_from_list(unique_ptr[scalar]& s, dtype): - - if not s.get()[0].is_valid(): - return NA - - cdef column_view list_col_view = (s.get()).view() - cdef Column element_col = Column.from_column_view(list_col_view, None) - - arrow_obj = to_arrow([element_col], [("None", dtype.element_type)])["None"] - - result = arrow_obj.to_pylist() - return _nested_na_replace(result) - - -cdef _get_py_string_from_string(unique_ptr[scalar]& s): - if not s.get()[0].is_valid(): - return NA - return (s.get())[0].to_string().decode() - - -cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s): - cdef scalar* s_ptr = s.get() - if not s_ptr[0].is_valid(): - return NA - - cdef libcudf_types.data_type cdtype = s_ptr[0].type() - - if cdtype.id() == libcudf_types.type_id.INT8: - return np.int8((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.INT16: - return np.int16((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.INT32: - return np.int32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.INT64: - return np.int64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.UINT8: - return np.uint8((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.UINT16: - return np.uint16((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.UINT32: - return np.uint32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.UINT64: - return np.uint64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.FLOAT32: - return np.float32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.FLOAT64: - return np.float64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.type_id.BOOL8: - return np.bool_((s_ptr)[0].value()) - else: - raise ValueError("Could not convert cudf::scalar to numpy scalar") - - -cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s): - cdef scalar* s_ptr = s.get() - if not s_ptr[0].is_valid(): - return NA - - cdef libcudf_types.data_type cdtype = s_ptr[0].type() - - if cdtype.id() == libcudf_types.type_id.DECIMAL64: - rep_val = int((s_ptr)[0].value()) - scale = int((s_ptr)[0].type().scale()) - return decimal.Decimal(rep_val).scaleb(scale) - elif cdtype.id() == libcudf_types.type_id.DECIMAL32: - rep_val = int((s_ptr)[0].value()) - scale = int((s_ptr)[0].type().scale()) - return decimal.Decimal(rep_val).scaleb(scale) - elif cdtype.id() == libcudf_types.type_id.DECIMAL128: - rep_val = int((s_ptr)[0].value()) - scale = int((s_ptr)[0].type().scale()) - return decimal.Decimal(rep_val).scaleb(scale) - else: - raise ValueError("Could not convert cudf::scalar to numpy scalar") - -cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s): - - cdef scalar* s_ptr = s.get() - - if not s_ptr[0].is_valid(): - return NaT - - cdef libcudf_types.data_type cdtype = s_ptr[0].type() - - if cdtype.id() == libcudf_types.type_id.TIMESTAMP_SECONDS: - return np.datetime64( - ( - s_ptr - )[0].ticks_since_epoch_64(), - datetime_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_MILLISECONDS: - return np.datetime64( - ( - s_ptr - )[0].ticks_since_epoch_64(), - datetime_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_MICROSECONDS: - return np.datetime64( - ( - s_ptr - )[0].ticks_since_epoch_64(), - datetime_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_NANOSECONDS: - return np.datetime64( - ( - s_ptr - )[0].ticks_since_epoch_64(), - datetime_unit_map[(cdtype.id())] - ) - else: - raise ValueError("Could not convert cudf::scalar to numpy scalar") - - -cdef _get_np_scalar_from_timedelta64(unique_ptr[scalar]& s): - - cdef scalar* s_ptr = s.get() - - if not s_ptr[0].is_valid(): - return NaT - - cdef libcudf_types.data_type cdtype = s_ptr[0].type() - - if cdtype.id() == libcudf_types.type_id.DURATION_SECONDS: - return np.timedelta64( - ( - s_ptr - )[0].ticks(), - duration_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.DURATION_MILLISECONDS: - return np.timedelta64( - ( - s_ptr - )[0].ticks(), - duration_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.DURATION_MICROSECONDS: - return np.timedelta64( - ( - s_ptr - )[0].ticks(), - duration_unit_map[(cdtype.id())] - ) - elif cdtype.id() == libcudf_types.type_id.DURATION_NANOSECONDS: - return np.timedelta64( - ( - s_ptr - )[0].ticks(), - duration_unit_map[(cdtype.id())] - ) - else: - raise ValueError("Could not convert cudf::scalar to numpy scalar") - def as_device_scalar(val, dtype=None): if isinstance(val, (cudf.Scalar, DeviceScalar)): @@ -607,16 +316,3 @@ def _create_proxy_nat_scalar(dtype): return result else: raise TypeError('NAT only valid for datetime and timedelta') - - -def _nested_na_replace(input_list): - ''' - Replace `None` with `cudf.NA` in the result of - `__getitem__` calls to list type columns - ''' - for idx, value in enumerate(input_list): - if isinstance(value, list): - _nested_na_replace(value) - elif value is None: - input_list[idx] = NA - return input_list diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 5dd58d8a875..ac10dd97c56 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -895,14 +895,14 @@ def test_memory_usage(): "data, idx", [ ( - [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": None}]], + [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": NA}]], 0, ), ( [ [ {"f2": {"a": 100, "c": 90, "f2": 10}, "f1": "a"}, - {"f1": "sf12", "f2": None}, + {"f1": "sf12", "f2": NA}, ] ], 0, diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index a3593e55b97..ce6dc587320 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -150,9 +150,7 @@ def test_struct_setitem(data, item): "data", [ {"a": 1, "b": "rapids", "c": [1, 2, 3, 4]}, - {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA}, {"a": "Hello"}, - {"b": [], "c": [1, 2, 3]}, ], ) def test_struct_scalar_host_construction(data): @@ -161,6 +159,39 @@ def test_struct_scalar_host_construction(data): assert list(slr.device_value.value.values()) == list(data.values()) +@pytest.mark.parametrize( + ("data", "dtype"), + [ + ( + {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA}, + cudf.StructDtype( + { + "a": np.dtype(np.int64), + "b": np.dtype(np.str_), + "c": cudf.ListDtype(np.dtype(np.int64)), + "d": np.dtype(np.int64), + } + ), + ), + ( + {"b": [], "c": [1, 2, 3]}, + cudf.StructDtype( + { + "b": cudf.ListDtype(np.dtype(np.int64)), + "c": cudf.ListDtype(np.dtype(np.int64)), + } + ), + ), + ], +) +def test_struct_scalar_host_construction_no_dtype_inference(data, dtype): + # cudf cannot infer the dtype of the scalar when it contains only nulls or + # is empty. + slr = cudf.Scalar(data, dtype=dtype) + assert slr.value == data + assert list(slr.device_value.value.values()) == list(data.values()) + + def test_struct_scalar_null(): slr = cudf.Scalar(cudf.NA, dtype=StructDtype) assert slr.device_value.value is cudf.NA diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 1b94db75340..73ea8e2cfc4 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -463,24 +463,6 @@ def _get_nan_for_dtype(dtype): return np.float64("nan") -def _decimal_to_int64(decimal: Decimal) -> int: - """ - Scale a Decimal such that the result is the integer - that would result from removing the decimal point. - - Examples - -------- - >>> _decimal_to_int64(Decimal('1.42')) - 142 - >>> _decimal_to_int64(Decimal('0.0042')) - 42 - >>> _decimal_to_int64(Decimal('-1.004201')) - -1004201 - - """ - return int(f"{decimal:0f}".replace(".", "")) - - def get_allowed_combinations_for_operator(dtype_l, dtype_r, op): error = TypeError( f"{op} not supported between {dtype_l} and {dtype_r} scalars" From daea8c8bc37ec53b7347857a3b6795bcb0ad86ff Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 26 Sep 2023 09:11:31 -0400 Subject: [PATCH 19/76] Disable `Recently Updated` Check (#14193) This check occasionally hangs for `cudf` for unknown reasons. Upon checking the application logs, the GitHub API seems to be returning responses that aren't helpful in troubleshooting the problem. Therefore, it's probably best to just remove the check to avoid confusion. [skip ci] Authors: - AJ Schmidt (https://github.com/ajschmidt8) --- .github/ops-bot.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml index 9a0b4155035..d2ca78924e1 100644 --- a/.github/ops-bot.yaml +++ b/.github/ops-bot.yaml @@ -5,4 +5,3 @@ auto_merger: true branch_checker: true label_checker: true release_drafter: true -recently_updated: true From 3196f6c36140962818aa8d12fe4fbd0dc522e31e Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 26 Sep 2023 11:54:18 -0500 Subject: [PATCH 20/76] update rmm tag path (#14195) PR updates the download path of the `rmm` tag used in `build_docs.sh` following the re-arrangement of the docs directories. Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cudf/pull/14195 --- ci/build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 1ed047a500b..9149b5e6bfe 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -30,7 +30,7 @@ export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" pushd cpp/doxygen -aws s3 cp s3://rapidsai-docs/librmm/${RAPIDS_VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag" +aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_NUMBER}/rmm.tag . || echo "Failed to download rmm Doxygen tag" doxygen Doxyfile mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html" mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html" From a9ec350217331979359c50ea1da9457e9973f719 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 26 Sep 2023 14:32:04 -0500 Subject: [PATCH 21/76] Fix pytorch related pytest (#14198) Calling `cudf.Index([])` results in `str` dtype `Index`. This PR fixes an issue with a pytorch related pytest by explicitly passing a `float64` dtype. xref: https://github.com/rapidsai/cudf/pull/14116 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/14198 --- python/cudf/cudf/tests/test_cuda_array_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index e81f4ec795a..848c77206b2 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import types from contextlib import ExitStack as does_not_raise @@ -193,7 +193,7 @@ def test_cuda_array_interface_pytorch(): assert_eq(got, cudf.Series(buffer, dtype=np.bool_)) - index = cudf.Index([]) + index = cudf.Index([], dtype="float64") tensor = torch.tensor(index) got = cudf.Index(tensor) assert_eq(got, index) From 030c0f4995ec458fcfc00a4ebb3aa8bccb2b27a0 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 26 Sep 2023 12:42:12 -0700 Subject: [PATCH 22/76] Refactor `contains_table` with cuco::static_set (#14064) Contributes to #12261 This PR refactors `contains_table` to use the new `cuco::static_set` data structure. It also adds a `contains_table` benchmark to track the performance before and after this work. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14064 --- cpp/benchmarks/CMakeLists.txt | 2 +- .../{contains.cpp => contains_scalar.cpp} | 0 cpp/benchmarks/search/contains_table.cpp | 73 ++++ cpp/include/cudf/detail/search.hpp | 2 + cpp/src/search/contains_table.cu | 319 +++++++++--------- 5 files changed, 229 insertions(+), 167 deletions(-) rename cpp/benchmarks/search/{contains.cpp => contains_scalar.cpp} (100%) create mode 100644 cpp/benchmarks/search/contains_table.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5e7862f4b3b..cd6b3cfdc03 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -173,7 +173,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) # ################################################################################################## # * search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search.cpp) -ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp) +ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table.cpp) # ################################################################################################## # * sort benchmark -------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/search/contains.cpp b/cpp/benchmarks/search/contains_scalar.cpp similarity index 100% rename from cpp/benchmarks/search/contains.cpp rename to cpp/benchmarks/search/contains_scalar.cpp diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp new file mode 100644 index 00000000000..17702d0741c --- /dev/null +++ b/cpp/benchmarks/search/contains_table.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +#include + +auto constexpr num_unique_elements = 1000; + +template +static void nvbench_contains_table(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("table_size"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_unique_elements) + .list_depth(1); + } else { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements); + } + + auto const haystack = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + auto const needles = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 1); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; + [[maybe_unused]] auto const result = + cudf::detail::contains(haystack->view(), + needles->view(), + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + stream_view, + rmm::mr::get_current_device_resource()); + }); + + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +NVBENCH_BENCH_TYPES(nvbench_contains_table, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("contains_table") + .set_type_axes_names({"type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000}); diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 4c4ad7834f4..4277baf3edd 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -81,6 +81,8 @@ std::unique_ptr contains(column_view const& haystack, * output = { false, true, true } * @endcode * + * @throws cudf::logic_error If column types of haystack and needles don't match + * * @param haystack The table containing the search space * @param needles A table of rows whose existence to check in the search space * @param compare_nulls Control whether nulls should be compared as equal or not diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index e37f0686ac3..43624ba691d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -26,7 +26,7 @@ #include -#include +#include #include @@ -37,69 +37,59 @@ namespace { using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; -using static_map = cuco::static_map>>; - /** - * @brief Check if the given type `T` is a strong index type (i.e., `lhs_index_type` or - * `rhs_index_type`). - * - * @return A boolean value indicating if `T` is a strong index type + * @brief An hasher adapter wrapping both haystack hasher and needles hasher */ -template -constexpr auto is_strong_index_type() -{ - return std::is_same_v || std::is_same_v; -} +template +struct hasher_adapter { + hasher_adapter(HaystackHasher const& haystack_hasher, NeedleHasher const& needle_hasher) + : _haystack_hasher{haystack_hasher}, _needle_hasher{needle_hasher} + { + } -/** - * @brief An adapter functor to support strong index types for row hasher that must be operating on - * `cudf::size_type`. - */ -template -struct strong_index_hasher_adapter { - strong_index_hasher_adapter(Hasher const& hasher) : _hasher{hasher} {} + __device__ constexpr auto operator()(lhs_index_type idx) const noexcept + { + return _haystack_hasher(static_cast(idx)); + } - template ())> - __device__ constexpr auto operator()(T const idx) const noexcept + __device__ constexpr auto operator()(rhs_index_type idx) const noexcept { - return _hasher(static_cast(idx)); + return _needle_hasher(static_cast(idx)); } private: - Hasher const _hasher; + HaystackHasher const _haystack_hasher; + NeedleHasher const _needle_hasher; }; /** - * @brief An adapter functor to support strong index type for table row comparator that must be - * operating on `cudf::size_type`. + * @brief An comparator adapter wrapping both self comparator and two table comparator */ -template -struct strong_index_comparator_adapter { - strong_index_comparator_adapter(Comparator const& comparator) : _comparator{comparator} {} - - template () && is_strong_index_type())> - __device__ constexpr auto operator()(T const lhs_index, U const rhs_index) const noexcept +template +struct comparator_adapter { + comparator_adapter(SelfEqual const& self_equal, TwoTableEqual const& two_table_equal) + : _self_equal{self_equal}, _two_table_equal{two_table_equal} + { + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + lhs_index_type rhs_index) const noexcept { auto const lhs = static_cast(lhs_index); auto const rhs = static_cast(rhs_index); - if constexpr (std::is_same_v || std::is_same_v) { - return _comparator(lhs, rhs); - } else { - // Here we have T == rhs_index_type. - // This is when the indices are provided in wrong order for two table comparator, so we need - // to switch them back to the right order before calling the underlying comparator. - return _comparator(rhs, lhs); - } + return _self_equal(lhs, rhs); + } + + __device__ constexpr auto operator()(lhs_index_type lhs_index, + rhs_index_type rhs_index) const noexcept + { + return _two_table_equal(lhs_index, rhs_index); } private: - Comparator const _comparator; + SelfEqual const _self_equal; + TwoTableEqual const _two_table_equal; }; /** @@ -134,38 +124,62 @@ std::pair build_row_bitmask(table_view } /** - * @brief Invoke an `operator()` template with a row equality comparator based on the specified - * `compare_nans` parameter. + * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans` + * parameter + * + * @tparam HasNested Flag indicating whether there are nested columns in haystack or needles + * @tparam Hasher Type of device hash function + * @tparam Func Type of the helper function doing `contains` check * - * @param compare_nans The flag to specify whether NaNs should be compared equal or not + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param haystack_has_nulls Flag indicating whether haystack has nulls or not + * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles + * @param self_equal Self table comparator + * @param two_table_equal Two table comparator + * @param d_hasher Device hash functor * @param func The input functor to invoke */ -template -void dispatch_nan_comparator(nan_equality compare_nans, Func&& func) +template +void dispatch_nan_comparator( + null_equality compare_nulls, + nan_equality compare_nans, + bool haystack_has_nulls, + bool has_any_nulls, + cudf::experimental::row::equality::self_comparator self_equal, + cudf::experimental::row::equality::two_table_comparator two_table_equal, + Hasher const& d_hasher, + Func&& func) { + // Distinguish probing scheme CG sizes between nested and flat types for better performance + auto const probing_scheme = [&]() { + if constexpr (HasNested) { + return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; + } else { + return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; + } + }(); + if (compare_nans == nan_equality::ALL_EQUAL) { using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - func(nan_equal_comparator{}); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{}); + func(d_self_equal, d_two_table_equal, probing_scheme); } else { using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - func(nan_unequal_comparator{}); + auto const d_self_equal = self_equal.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{}); + auto const d_two_table_equal = two_table_equal.equal_to( + nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{}); + func(d_self_equal, d_two_table_equal, probing_scheme); } } } // namespace -/** - * @brief Check if rows in the given `needles` table exist in the `haystack` table. - * - * @param haystack The table containing the search space - * @param needles A table of rows whose existence to check in the search space - * @param compare_nulls Control whether nulls should be compared as equal or not - * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` - */ rmm::device_uvector contains(table_view const& haystack, table_view const& needles, null_equality compare_nulls, @@ -173,124 +187,97 @@ rmm::device_uvector contains(table_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto map = static_map(compute_hash_table_size(haystack.num_rows()), - cuco::empty_key{lhs_index_type{std::numeric_limits::max()}}, - cuco::empty_value{detail::JoinNoneValue}, - detail::hash_table_allocator_type{default_allocator{}, stream}, - stream.value()); + CUDF_EXPECTS(cudf::have_same_types(haystack, needles), "Column types mismatch"); auto const haystack_has_nulls = has_nested_nulls(haystack); auto const needles_has_nulls = has_nested_nulls(needles); auto const has_any_nulls = haystack_has_nulls || needles_has_nulls; + auto const preprocessed_needles = + cudf::experimental::row::equality::preprocessed_table::create(needles, stream); auto const preprocessed_haystack = cudf::experimental::row::equality::preprocessed_table::create(haystack, stream); - // Insert row indices of the haystack table as map keys. - { - auto const haystack_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, - [] __device__(auto const idx) { return cuco::make_pair(lhs_index_type{idx}, 0); }); - - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; - - auto const comparator = - cudf::experimental::row::equality::self_comparator(preprocessed_haystack); - - // If the haystack table has nulls but they are compared unequal, don't insert them. - // Otherwise, it was known to cause performance issue: - // - https://github.com/rapidsai/cudf/pull/6943 - // - https://github.com/rapidsai/cudf/pull/8277 - if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { - auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); - auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - } - }; - - // Insert only rows that do not have any null at any level. - dispatch_nan_comparator(compare_nans, insert_map); - } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - auto const insert_map = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack)) { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } else { - auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( - nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - } - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } - } + + auto const haystack_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_haystack); + auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const needle_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); + auto const d_needle_hasher = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls}); + auto const d_hasher = hasher_adapter{d_haystack_hasher, d_needle_hasher}; + + auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_haystack, preprocessed_needles); // The output vector. auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); - auto const preprocessed_needles = - cudf::experimental::row::equality::preprocessed_table::create(needles, stream); - // Check existence for each row of the needles table in the haystack table. - { - auto const needles_it = cudf::detail::make_counting_transform_iterator( - size_type{0}, [] __device__(auto const idx) { return rhs_index_type{idx}; }); - - auto const hasher = cudf::experimental::row::hash::row_hasher(preprocessed_needles); - auto const d_hasher = - strong_index_hasher_adapter{hasher.device_hasher(nullate::DYNAMIC{has_any_nulls})}; - - auto const comparator = cudf::experimental::row::equality::two_table_comparator( - preprocessed_haystack, preprocessed_needles); - - auto const check_contains = [&](auto const value_comp) { - if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); + auto const haystack_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return lhs_index_type{idx}; }); + auto const needles_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(auto idx) { return rhs_index_type{idx}; }); + + auto const helper_func = + [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { + auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; + + auto set = cuco::experimental::static_set{ + cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{lhs_index_type{-1}}, + d_equal, + probing_scheme, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + + // If the haystack table has nulls but they are compared unequal, don't insert them. + // Otherwise, it was known to cause performance issue: + // - https://github.com/rapidsai/cudf/pull/6943 + // - https://github.com/rapidsai/cudf/pull/8277 + set.insert_if_async(haystack_iter, + haystack_iter + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + stream.value()); } else { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); + set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value()); + } + + if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) { + auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream); + auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; + set.contains_if_async(needles_iter, + needles_iter + needles.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + contained.begin(), + stream.value()); + } else { + set.contains_async( + needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value()); } }; - dispatch_nan_comparator(compare_nans, check_contains); + if (cudf::detail::has_nested_columns(haystack)) { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); + } else { + dispatch_nan_comparator(compare_nulls, + compare_nans, + haystack_has_nulls, + has_any_nulls, + self_equal, + two_table_equal, + d_hasher, + helper_func); } return contained; From b25b292f7f97cbb681f0244e1a20b30a925145a1 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 26 Sep 2023 18:53:43 -0400 Subject: [PATCH 23/76] Add nvtext::tokenize_with_vocabulary API (#13930) Adds tokenize with vocabulary APIs to libcudf. ``` struct tokenize_vocabulary{ ... }; std::unique_ptr load_vocabulary( cudf::strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); std::unique_ptr tokenize_with_vocabulary( cudf::strings_column_view const& input, tokenize_vocabulary const& vocabulary, cudf::string_scalar const& delimiter, cudf::size_type default_id, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); ``` Returns an integer lists column replacing individual tokens as resolved from the `input` using `delimiter` with id values which are the row indices of the input `vocabulary` column. If a token is not found in the `vocabulary` it is assigned `default_id`. The vocabulary can be loaded once using the `nvtext::load_vocabulary()` API and then used in repeated calls to `nvtext::tokenize_with_vocabulary()` with different input columns. Python interface is new class `TokenizeVocabulary` which can be used like the following: ``` >>> import cudf >>> from cudf.core.tokenize_vocabulary import TokenizeVocabulary >>> words = cudf.Series( ['brown', 'the', 'dog', 'jumps'] ) >>> vocab = TokenizeVocabulary(words) >>> s = cudf.Series( ['the brown dog jumps over the brown cat'] ) >>> print(vocab(s)) 0 [1, 0, 2, 3, -1, 1, 0, -1] dtype: list ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - https://github.com/nvdbaranec - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13930 --- cpp/CMakeLists.txt | 1 + cpp/include/nvtext/tokenize.hpp | 78 ++++++ cpp/src/text/vocabulary_tokenize.cu | 257 ++++++++++++++++++ cpp/tests/text/tokenize_tests.cpp | 93 +++++-- python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd | 17 +- python/cudf/cudf/_lib/nvtext/tokenize.pyx | 40 ++- python/cudf/cudf/_lib/strings/__init__.py | 1 + python/cudf/cudf/core/tokenize_vocabulary.py | 48 ++++ .../cudf/cudf/tests/text/test_text_methods.py | 59 ++++ 9 files changed, 574 insertions(+), 20 deletions(-) create mode 100644 cpp/src/text/vocabulary_tokenize.cu create mode 100644 python/cudf/cudf/core/tokenize_vocabulary.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a84f7bd5224..9656bc40fd7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -618,6 +618,7 @@ add_library( src/text/subword/subword_tokenize.cu src/text/subword/wordpiece_tokenizer.cu src/text/tokenize.cu + src/text/vocabulary_tokenize.cu src/transform/bools_to_mask.cu src/transform/compute_column.cu src/transform/encode.cu diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index a72f7dcfa59..44f8f44557c 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -215,5 +215,83 @@ std::unique_ptr detokenize( cudf::string_scalar const& separator = cudf::string_scalar(" "), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Vocabulary object to be used with nvtext::tokenize_with_vocabulary + * + * Use nvtext::load_vocabulary to create this object. + */ +struct tokenize_vocabulary { + /** + * @brief Vocabulary object constructor + * + * Token ids are the row indices within the vocabulary column. + * Each vocabulary entry is expected to be unique otherwise the behavior is undefined. + * + * @throw cudf::logic_error if `vocabulary` contains nulls or is empty + * + * @param input Strings for the vocabulary + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ + tokenize_vocabulary(cudf::strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + ~tokenize_vocabulary(); + + struct tokenize_vocabulary_impl; + tokenize_vocabulary_impl* _impl{}; +}; + +/** + * @brief Create a tokenize_vocabulary object from a strings column + * + * Token ids are the row indices within the vocabulary column. + * Each vocabulary entry is expected to be unique otherwise the behavior is undefined. + * + * @throw cudf::logic_error if `vocabulary` contains nulls or is empty + * + * @param input Strings for the vocabulary + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Object to be used with nvtext::tokenize_with_vocabulary + */ +std::unique_ptr load_vocabulary( + cudf::strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns the token ids for the input string by looking up each delimited + * token in the given vocabulary + * + * @code{.pseudo} + * Example: + * s = ["hello world", "hello there", "there there world", "watch out world"] + * v = load_vocabulary(["hello", "there", "world"]) + * r = tokenize_with_vocabulary(s,v) + * r is now [[0,2], [0,1], [1,1,2], [-1,-1,2]] + * @endcode + * + * Any null row entry results in a corresponding null entry in the output + * + * @throw cudf::logic_error if `delimiter` is invalid + * + * @param input Strings column to tokenize + * @param vocabulary Used to lookup tokens within + * @param delimiter Used to identify tokens within `input` + * @param default_id The token id to be used for tokens not found in the `vocabulary`; + * Default is -1 + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Lists column of token ids + */ +std::unique_ptr tokenize_with_vocabulary( + cudf::strings_column_view const& input, + tokenize_vocabulary const& vocabulary, + cudf::string_scalar const& delimiter, + cudf::size_type default_id = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of tokenize group } // namespace nvtext diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu new file mode 100644 index 00000000000..f998c9ec239 --- /dev/null +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace nvtext { +namespace detail { +namespace { + +using string_hasher_type = cudf::hashing::detail::MurmurHash3_x86_32; +using hash_value_type = string_hasher_type::result_type; + +/** + * @brief Hasher function used for building and using the cuco static-map + * + * This takes advantage of heterogeneous lookup feature in cuco static-map which + * allows inserting with one type (index) and looking up with a different type (string). + */ +struct vocab_hasher { + cudf::column_device_view const d_strings; + string_hasher_type hasher{}; + // used by insert + __device__ hash_value_type operator()(cudf::size_type index) const + { + return hasher(d_strings.element(index)); + } + // used by find + __device__ hash_value_type operator()(cudf::string_view const& s) const { return hasher(s); } +}; + +/** + * @brief Equal function used for building and using the cuco static-map + * + * This takes advantage of heterogeneous lookup feature in cuco static-map which + * allows inserting with one type (index) and looking up with a different type (string). + */ +struct vocab_equal { + cudf::column_device_view const d_strings; + // used by insert + __device__ bool operator()(cudf::size_type lhs, cudf::size_type rhs) const noexcept + { + return lhs == rhs; // all rows are expected to be unique + } + // used by find + __device__ bool operator()(cudf::size_type lhs, cudf::string_view const& rhs) const noexcept + { + return d_strings.element(lhs) == rhs; + } +}; + +using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; +using probe_scheme = cuco::experimental::linear_probing<1, vocab_hasher>; +using vocabulary_map_type = cuco::experimental::static_map, + cuda::thread_scope_device, + vocab_equal, + probe_scheme, + hash_table_allocator_type>; +} // namespace +} // namespace detail + +// since column_device_view::create returns is a little more than +// std::unique_ptr this helper simplifies the return type in a maintainable way +using col_device_view = std::invoke_result_t; + +struct tokenize_vocabulary::tokenize_vocabulary_impl { + std::unique_ptr const vocabulary; + col_device_view const d_vocabulary; + std::unique_ptr vocabulary_map; + + auto get_map_ref() const { return vocabulary_map->ref(cuco::experimental::op::find); } + + tokenize_vocabulary_impl(std::unique_ptr&& vocab, + col_device_view&& d_vocab, + std::unique_ptr&& map) + : vocabulary(std::move(vocab)), d_vocabulary(std::move(d_vocab)), vocabulary_map(std::move(map)) + { + } +}; + +struct key_pair { + __device__ auto operator()(cudf::size_type idx) const noexcept + { + return cuco::make_pair(idx, idx); + } +}; + +tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(not input.is_empty(), "vocabulary must not be empty"); + CUDF_EXPECTS(not input.has_nulls(), "vocabulary must not have nulls"); + + // need to hold a copy of the input + auto vocabulary = std::make_unique(input.parent(), stream, mr); + auto d_vocabulary = cudf::column_device_view::create(vocabulary->view(), stream); + + auto vocab_map = std::make_unique( + static_cast(vocabulary->size() * 2), + cuco::empty_key{-1}, + cuco::empty_value{-1}, + detail::vocab_equal{*d_vocabulary}, + detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}}, + detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()); + + // the row index is the token id (value for each key in the map) + auto iter = cudf::detail::make_counting_transform_iterator(0, key_pair{}); + vocab_map->insert_async(iter, iter + vocabulary->size(), stream.value()); + + _impl = new tokenize_vocabulary_impl( + std::move(vocabulary), std::move(d_vocabulary), std::move(vocab_map)); +} +tokenize_vocabulary::~tokenize_vocabulary() { delete _impl; } + +std::unique_ptr load_vocabulary(cudf::strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return std::make_unique(input, stream, mr); +} + +namespace detail { +namespace { + +/** + * @brief Tokenizes each string and uses the map to assign token id values + * + * @tparam MapRefType Type of the static_map reference for calling find() + */ +template +struct vocabulary_tokenizer_fn { + cudf::column_device_view const d_strings; + cudf::string_view const d_delimiter; + MapRefType d_map; + cudf::size_type const default_id; + cudf::size_type const* d_offsets; + cudf::size_type* d_results; + + __device__ void operator()(cudf::size_type idx) const + { + if (d_strings.is_null(idx)) { return; } + + auto const d_str = d_strings.element(idx); + characters_tokenizer tokenizer(d_str, d_delimiter); + auto d_tokens = d_results + d_offsets[idx]; + + cudf::size_type token_idx = 0; + while (tokenizer.next_token()) { + auto const pos = tokenizer.token_byte_positions(); + auto const token = cudf::string_view{d_str.data() + pos.first, (pos.second - pos.first)}; + // lookup token in map + auto const itr = d_map.find(token); + auto const id = (itr != d_map.end()) ? itr->second : default_id; + // set value into the output + d_tokens[token_idx++] = id; + } + } +}; + +} // namespace + +std::unique_ptr tokenize_with_vocabulary(cudf::strings_column_view const& input, + tokenize_vocabulary const& vocabulary, + cudf::string_scalar const& delimiter, + cudf::size_type default_id, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); + + auto const output_type = cudf::data_type{cudf::type_to_id()}; + if (input.is_empty()) { return cudf::make_empty_column(output_type); } + + // count the tokens per string and build the offsets from the counts + auto const d_strings = cudf::column_device_view::create(input.parent(), stream); + auto const d_delimiter = delimiter.value(stream); + auto const sizes_itr = + cudf::detail::make_counting_transform_iterator(0, strings_tokenizer{*d_strings, d_delimiter}); + auto [token_offsets, total_count] = + cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + input.size(), stream, mr); + + // build the output column to hold all the token ids + auto tokens = + cudf::make_numeric_column(output_type, total_count, cudf::mask_state::UNALLOCATED, stream, mr); + auto map_ref = vocabulary._impl->get_map_ref(); + auto d_offsets = token_offsets->view().data(); + auto d_tokens = tokens->mutable_view().data(); + vocabulary_tokenizer_fn tokenizer{ + *d_strings, d_delimiter, map_ref, default_id, d_offsets, d_tokens}; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + input.size(), + tokenizer); + + return cudf::make_lists_column(input.size(), + std::move(token_offsets), + std::move(tokens), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + stream, + mr); +} + +} // namespace detail + +std::unique_ptr tokenize_with_vocabulary(cudf::strings_column_view const& input, + tokenize_vocabulary const& vocabulary, + cudf::string_scalar const& delimiter, + cudf::size_type default_id, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::tokenize_with_vocabulary(input, vocabulary, delimiter, default_id, stream, mr); +} + +} // namespace nvtext diff --git a/cpp/tests/text/tokenize_tests.cpp b/cpp/tests/text/tokenize_tests.cpp index 14fc4f8c6db..d78f2dfbdf3 100644 --- a/cpp/tests/text/tokenize_tests.cpp +++ b/cpp/tests/text/tokenize_tests.cpp @@ -14,14 +14,16 @@ * limitations under the License. */ -#include -#include -#include -#include - #include #include #include +#include + +#include + +#include +#include +#include #include @@ -125,29 +127,37 @@ TEST_F(TextTokenizeTest, CharacterTokenize) TEST_F(TextTokenizeTest, TokenizeEmptyTest) { - auto strings = cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}); - cudf::test::strings_column_wrapper all_empty({"", "", ""}); - cudf::test::strings_column_wrapper all_null({"", "", ""}, {0, 0, 0}); - cudf::test::fixed_width_column_wrapper expected({0, 0, 0}); - - auto results = nvtext::tokenize(cudf::strings_column_view(strings->view())); + auto input = cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}); + auto view = cudf::strings_column_view(input->view()); + cudf::test::strings_column_wrapper all_empty_wrapper({"", "", ""}); + auto all_empty = cudf::strings_column_view(all_empty_wrapper); + cudf::test::strings_column_wrapper all_null_wrapper({"", "", ""}, {0, 0, 0}); + auto all_null = cudf::strings_column_view(all_null_wrapper); + cudf::test::fixed_width_column_wrapper expected({0, 0, 0}); + + auto results = nvtext::tokenize(view); EXPECT_EQ(results->size(), 0); - results = nvtext::tokenize(cudf::strings_column_view(all_empty)); + results = nvtext::tokenize(all_empty); EXPECT_EQ(results->size(), 0); - results = nvtext::tokenize(cudf::strings_column_view(all_null)); + results = nvtext::tokenize(all_null); EXPECT_EQ(results->size(), 0); - results = nvtext::count_tokens(cudf::strings_column_view(strings->view())); + results = nvtext::count_tokens(view); EXPECT_EQ(results->size(), 0); - results = nvtext::count_tokens(cudf::strings_column_view(all_empty)); + results = nvtext::count_tokens(all_empty); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); results = nvtext::count_tokens(cudf::strings_column_view(all_null)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = nvtext::character_tokenize(cudf::strings_column_view(strings->view())); + results = nvtext::character_tokenize(view); EXPECT_EQ(results->size(), 0); - results = nvtext::character_tokenize(cudf::strings_column_view(all_empty)); + results = nvtext::character_tokenize(all_empty); EXPECT_EQ(results->size(), 0); - results = nvtext::character_tokenize(cudf::strings_column_view(all_null)); + results = nvtext::character_tokenize(all_null); EXPECT_EQ(results->size(), 0); + auto const delimiter = cudf::string_scalar{""}; + results = nvtext::tokenize_with_vocabulary(view, all_empty, delimiter); + EXPECT_EQ(results->size(), 0); + results = nvtext::tokenize_with_vocabulary(all_null, all_empty, delimiter); + EXPECT_EQ(results->size(), results->null_count()); } TEST_F(TextTokenizeTest, Detokenize) @@ -191,3 +201,50 @@ TEST_F(TextTokenizeTest, DetokenizeErrors) EXPECT_THROW(nvtext::detokenize(strings_view, one, cudf::string_scalar("", false)), cudf::logic_error); } + +TEST_F(TextTokenizeTest, Vocabulary) +{ + cudf::test::strings_column_wrapper vocabulary( // leaving out 'cat' on purpose + {"ate", "chased", "cheese", "dog", "fox", "jumped", "mouse", "mousé", "over", "the"}); + auto vocab = nvtext::load_vocabulary(cudf::strings_column_view(vocabulary)); + + auto validity = cudf::test::iterators::null_at(1); + cudf::test::strings_column_wrapper input({"the fox jumped over the dog", + "the dog chased the cat", + "the cat chased the mouse", + "the mousé ate cheese", + "", + ""}, + validity); + auto input_view = cudf::strings_column_view(input); + auto delimiter = cudf::string_scalar(" "); + auto default_id = -7; // should be the token for the missing 'cat' + auto results = nvtext::tokenize_with_vocabulary(input_view, *vocab, delimiter, default_id); + + using LCW = cudf::test::lists_column_wrapper; + // clang-format off + LCW expected({LCW{ 9, 4, 5, 8, 9, 3}, + LCW{ 9, 3, 1, 9,-7}, + LCW{ 9,-7, 1, 9, 6}, + LCW{ 9, 7, 0, 2}, + LCW{}, LCW{}}, + validity); + // clang-format on + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); +} + +TEST_F(TextTokenizeTest, TokenizeErrors) +{ + cudf::test::strings_column_wrapper empty{}; + cudf::strings_column_view view(empty); + EXPECT_THROW(nvtext::load_vocabulary(view), cudf::logic_error); + + cudf::test::strings_column_wrapper vocab_nulls({""}, {0}); + cudf::strings_column_view nulls(vocab_nulls); + EXPECT_THROW(nvtext::load_vocabulary(nulls), cudf::logic_error); + + cudf::test::strings_column_wrapper some{"hello"}; + auto vocab = nvtext::load_vocabulary(cudf::strings_column_view(some)); + EXPECT_THROW(nvtext::tokenize_with_vocabulary(view, *vocab, cudf::string_scalar("", false)), + cudf::logic_error); +} diff --git a/python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd b/python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd index 8b80f50e381..3cc3fd6251a 100644 --- a/python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd +++ b/python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd @@ -1,10 +1,11 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.scalar.scalar cimport string_scalar +from cudf._lib.cpp.types cimport size_type cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: @@ -38,3 +39,17 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: const column_view & row_indices, const string_scalar & separator ) except + + + cdef struct tokenize_vocabulary "nvtext::tokenize_vocabulary": + pass + + cdef unique_ptr[tokenize_vocabulary] load_vocabulary( + const column_view & strings + ) except + + + cdef unique_ptr[column] tokenize_with_vocabulary( + const column_view & strings, + const tokenize_vocabulary & vocabulary, + const string_scalar & delimiter, + size_type default_id + ) except + diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx index 2bb4fa8e108..bee9d6f6c4d 100644 --- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock @@ -12,9 +12,13 @@ from cudf._lib.cpp.nvtext.tokenize cimport ( character_tokenize as cpp_character_tokenize, count_tokens as cpp_count_tokens, detokenize as cpp_detokenize, + load_vocabulary as cpp_load_vocabulary, tokenize as cpp_tokenize, + tokenize_vocabulary as cpp_tokenize_vocabulary, + tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, ) from cudf._lib.cpp.scalar.scalar cimport string_scalar +from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar @@ -122,3 +126,37 @@ def detokenize(Column strings, Column indices, object py_separator): ) return Column.from_unique_ptr(move(c_result)) + + +cdef class TokenizeVocabulary: + cdef unique_ptr[cpp_tokenize_vocabulary] c_obj + + def __cinit__(self, Column vocab): + cdef column_view c_vocab = vocab.view() + with nogil: + self.c_obj = move(cpp_load_vocabulary(c_vocab)) + + +@acquire_spill_lock() +def tokenize_with_vocabulary(Column strings, + TokenizeVocabulary vocabulary, + object py_delimiter, + size_type default_id): + + cdef DeviceScalar delimiter = py_delimiter.device_value + cdef column_view c_strings = strings.view() + cdef const string_scalar* c_delimiter = delimiter\ + .get_raw_ptr() + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_tokenize_with_vocabulary( + c_strings, + vocabulary.c_obj.get()[0], + c_delimiter[0], + default_id + ) + ) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py index 16875e4397e..47a194c4fda 100644 --- a/python/cudf/cudf/_lib/strings/__init__.py +++ b/python/cudf/cudf/_lib/strings/__init__.py @@ -23,6 +23,7 @@ _tokenize_scalar, character_tokenize, detokenize, + tokenize_with_vocabulary, ) from cudf._lib.strings.attributes import ( code_points, diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py new file mode 100644 index 00000000000..afb3496311b --- /dev/null +++ b/python/cudf/cudf/core/tokenize_vocabulary.py @@ -0,0 +1,48 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from __future__ import annotations + +import cudf +from cudf._lib.nvtext.tokenize import ( + TokenizeVocabulary as cpp_tokenize_vocabulary, + tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, +) + + +class TokenizeVocabulary: + """ + A vocabulary object used to tokenize input text. + + Parameters + ---------- + vocabulary : str + Strings column of vocabulary terms + """ + + def __init__(self, vocabulary: "cudf.Series"): + self.vocabulary = cpp_tokenize_vocabulary(vocabulary._column) + + def tokenize(self, text, delimiter: str = "", default_id: int = -1): + """ + Parameters + ---------- + text : cudf string series + The strings to be tokenized. + delimiter : str + Delimiter to identify tokens. Default is whitespace. + default_id : int + Value to use for tokens not found in the vocabulary. + Default is -1. + + Returns + ------- + Tokenized strings + """ + if delimiter is None: + delimiter = "" + delim = cudf.Scalar(delimiter, dtype="str") + result = cpp_tokenize_with_vocabulary( + text._column, self.vocabulary, delim, default_id + ) + + return cudf.Series(result) diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py index 8cda15e4acc..2241390a531 100644 --- a/python/cudf/cudf/tests/text/test_text_methods.py +++ b/python/cudf/cudf/tests/text/test_text_methods.py @@ -7,6 +7,7 @@ import pytest import cudf +from cudf.core.tokenize_vocabulary import TokenizeVocabulary from cudf.testing._utils import assert_eq @@ -156,6 +157,64 @@ def test_token_count(delimiter, expected_token_counts): assert_eq(expected, actual, check_dtype=False) +@pytest.mark.parametrize( + "delimiter, input, default_id, results", + [ + ( + "", + "the quick brown fox jumps over the lazy brown dog", + 99, + [0, 1, 2, 3, 4, 5, 0, 99, 2, 6], + ), + ( + " ", + " the sable siamésé cat jumps under the brown sofa ", + -1, + [0, 7, 8, 9, 4, 10, 0, 2, 11], + ), + ( + "_", + "the_quick_brown_fox_jumped__over_the_lazy_brown_dog", + -99, + [0, 1, 2, 3, -99, 5, 0, -99, 2, 6], + ), + ], +) +def test_tokenize_with_vocabulary(delimiter, input, default_id, results): + vocabulary = cudf.Series( + [ + "the", + "quick", + "brown", + "fox", + "jumps", + "over", + "dog", + "sable", + "siamésé", + "cat", + "under", + "sofa", + ] + ) + tokenizer = TokenizeVocabulary(vocabulary) + + strings = cudf.Series([input, None, "", input]) + + expected = cudf.Series( + [ + cudf.Series(results, dtype=np.int32), + None, + cudf.Series([], dtype=np.int32), + cudf.Series(results, dtype=np.int32), + ] + ) + + actual = tokenizer.tokenize(strings, delimiter, default_id) + assert type(expected) == type(actual) + assert_eq(expected, actual) + + def test_normalize_spaces(): strings = cudf.Series( [ From 31e56702fe15f44b3e849207d31d0bb79c307367 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 27 Sep 2023 09:29:35 +0530 Subject: [PATCH 24/76] Workaround for illegal instruction error in sm90 for warp instrinsics with mask (#14201) Workaround for illegal instruction error in sm90 for warp instrinsics with non `0xffffffff` mask Removed the mask, and used ~0u (`0xffffffff`) as MASK because - all threads in warp has correct data on error since is_within_bounds==true thread update error. - init_state is not required at last iteration only where MASK is not ~0u. Fixes #14183 Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Divye Gala (https://github.com/divyegala) - Elias Stehle (https://github.com/elstehle) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/14201 --- cpp/src/io/utilities/data_casting.cu | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index d16237d7afe..9e5c5c76392 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -534,8 +534,7 @@ __global__ void parse_fn_string_parallel(str_tuple_it str_tuples, char_index < cudf::util::round_up_safe(in_end - in_begin, static_cast(BLOCK_SIZE)); char_index += BLOCK_SIZE) { bool const is_within_bounds = char_index < (in_end - in_begin); - auto const MASK = is_warp ? __ballot_sync(0xffffffff, is_within_bounds) : 0xffffffff; - auto const c = is_within_bounds ? in_begin[char_index] : '\0'; + auto const c = is_within_bounds ? in_begin[char_index] : '\0'; auto const prev_c = (char_index > 0 and is_within_bounds) ? in_begin[char_index - 1] : '\0'; auto const escaped_char = get_escape_char(c); @@ -571,7 +570,7 @@ __global__ void parse_fn_string_parallel(str_tuple_it str_tuples, __shared__ typename SlashScan::TempStorage temp_slash[num_warps]; SlashScan(temp_slash[warp_id]).InclusiveScan(curr, scanned, composite_op); is_escaping_backslash = scanned.get(init_state); - init_state = __shfl_sync(MASK, is_escaping_backslash, BLOCK_SIZE - 1); + init_state = __shfl_sync(~0u, is_escaping_backslash, BLOCK_SIZE - 1); __syncwarp(); is_slash.shift(warp_id); is_slash.set_bits(warp_id, is_escaping_backslash); @@ -604,7 +603,7 @@ __global__ void parse_fn_string_parallel(str_tuple_it str_tuples, } // Make sure all threads have no errors before continuing if constexpr (is_warp) { - error = __any_sync(MASK, error); + error = __any_sync(~0u, error); } else { using ErrorReduce = cub::BlockReduce; __shared__ typename ErrorReduce::TempStorage temp_storage_error; @@ -932,13 +931,8 @@ std::unique_ptr parse_data( auto str_tuples = thrust::make_transform_iterator(offset_length_begin, to_string_view_pair{data}); if (col_type == cudf::data_type{cudf::type_id::STRING}) { - return parse_string(str_tuples, - col_size, - std::forward(null_mask), - d_null_count, - options, - stream, - mr); + return parse_string( + str_tuples, col_size, std::move(null_mask), d_null_count, options, stream, mr); } auto out_col = From cdc03a73db880e294f8c4916d942a4568a64d5db Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 27 Sep 2023 14:15:33 +0100 Subject: [PATCH 25/76] Marginally reduce memory footprint of joins (#14197) If we drop the gather maps as soon as we are done with them, we have a little more headroom for joins that are close to hitting the device memory limit. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/14197 --- python/cudf/cudf/core/join/join.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 6a6e37180ca..b94f8f583f4 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -203,6 +203,7 @@ def perform_merge(self) -> cudf.DataFrame: if left_rows is not None else cudf.DataFrame._from_data({}) ) + del left_rows right_result = ( self.rhs._gather( GatherMap.from_column_unchecked( @@ -213,7 +214,7 @@ def perform_merge(self) -> cudf.DataFrame: if right_rows is not None else cudf.DataFrame._from_data({}) ) - + del right_rows result = cudf.DataFrame._from_data( *self._merge_results(left_result, right_result) ) From ce247961216dd70f389763dc086f137c11ad7346 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Wed, 27 Sep 2023 10:10:31 -0700 Subject: [PATCH 26/76] Implement `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations (#14045) This adds two more aggregations for groupby and reduction: * `HISTOGRAM`: Count the number of occurrences (aka frequency) for each element, and * `MERGE_HISTOGRAM`: Merge different outputs generated by `HISTOGRAM` aggregations This is the prerequisite for implementing the exact distributed percentile aggregation (https://github.com/rapidsai/cudf/issues/13885). However, these two new aggregations may be useful in other use-cases that need to do frequency counting. Closes https://github.com/rapidsai/cudf/issues/13885. Merging checklist: * [X] Working prototypes. * [X] Cleanup and docs. * [X] Unit test. * [ ] Test with spark-rapids integration tests. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Yunsong Wang (https://github.com/PointKernel) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/14045 --- cpp/CMakeLists.txt | 2 + cpp/include/cudf/aggregation.hpp | 22 +- .../cudf/detail/aggregation/aggregation.hpp | 60 +++ .../cudf/detail/hash_reduce_by_row.cuh | 4 + .../cudf/reduction/detail/histogram.hpp | 57 +++ .../reduction/detail/reduction_functions.hpp | 27 ++ cpp/src/aggregation/aggregation.cpp | 42 ++ cpp/src/groupby/groupby.cu | 10 + cpp/src/groupby/sort/aggregate.cpp | 30 ++ cpp/src/groupby/sort/group_histogram.cu | 152 +++++++ cpp/src/groupby/sort/group_reductions.hpp | 57 ++- cpp/src/reductions/histogram.cu | 273 ++++++++++++ cpp/src/reductions/reductions.cpp | 12 + cpp/tests/CMakeLists.txt | 1 + cpp/tests/groupby/histogram_tests.cpp | 396 ++++++++++++++++++ cpp/tests/reductions/reduction_tests.cpp | 207 +++++++++ 16 files changed, 1349 insertions(+), 3 deletions(-) create mode 100644 cpp/include/cudf/reduction/detail/histogram.hpp create mode 100644 cpp/src/groupby/sort/group_histogram.cu create mode 100644 cpp/src/reductions/histogram.cu create mode 100644 cpp/tests/groupby/histogram_tests.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9656bc40fd7..ec58c391001 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -323,6 +323,7 @@ add_library( src/groupby/sort/group_collect.cu src/groupby/sort/group_correlation.cu src/groupby/sort/group_count.cu + src/groupby/sort/group_histogram.cu src/groupby/sort/group_m2.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu @@ -471,6 +472,7 @@ add_library( src/reductions/all.cu src/reductions/any.cu src/reductions/collect_ops.cu + src/reductions/histogram.cu src/reductions/max.cu src/reductions/mean.cu src/reductions/min.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index d319041f8b1..d458c831f19 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,7 +116,9 @@ class aggregation { COVARIANCE, ///< covariance between two sets of elements CORRELATION, ///< correlation between two sets of elements TDIGEST, ///< create a tdigest from a set of input values - MERGE_TDIGEST ///< create a tdigest by merging multiple tdigests together + MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together + HISTOGRAM, ///< compute frequency of each element + MERGE_HISTOGRAM ///< merge partial values of HISTOGRAM aggregation, }; aggregation() = delete; @@ -288,6 +290,11 @@ std::unique_ptr make_any_aggregation(); template std::unique_ptr make_all_aggregation(); +/// Factory to create a HISTOGRAM aggregation +/// @return A HISTOGRAM aggregation object +template +std::unique_ptr make_histogram_aggregation(); + /// Factory to create a SUM_OF_SQUARES aggregation /// @return A SUM_OF_SQUARES aggregation object template @@ -610,6 +617,17 @@ std::unique_ptr make_merge_sets_aggregation( template std::unique_ptr make_merge_m2_aggregation(); +/** + * @brief Factory to create a MERGE_HISTOGRAM aggregation + * + * Merges the results of `HISTOGRAM` aggregations on independent sets into a new `HISTOGRAM` value + * equivalent to if a single `HISTOGRAM` aggregation was done across all of the sets at once. + * + * @return A MERGE_HISTOGRAM aggregation object + */ +template +std::unique_ptr make_merge_histogram_aggregation(); + /** * @brief Factory to create a COVARIANCE aggregation * diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 4d3984cab93..784f05a964e 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -45,6 +45,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class max_aggregation const& agg); virtual std::vector> visit(data_type col_type, class count_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class histogram_aggregation const& agg); virtual std::vector> visit(data_type col_type, class any_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -89,6 +91,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class merge_sets_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_m2_aggregation const& agg); + virtual std::vector> visit( + data_type col_type, class merge_histogram_aggregation const& agg); virtual std::vector> visit(data_type col_type, class covariance_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -108,6 +112,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class min_aggregation const& agg); virtual void visit(class max_aggregation const& agg); virtual void visit(class count_aggregation const& agg); + virtual void visit(class histogram_aggregation const& agg); virtual void visit(class any_aggregation const& agg); virtual void visit(class all_aggregation const& agg); virtual void visit(class sum_of_squares_aggregation const& agg); @@ -130,6 +135,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class merge_lists_aggregation const& agg); virtual void visit(class merge_sets_aggregation const& agg); virtual void visit(class merge_m2_aggregation const& agg); + virtual void visit(class merge_histogram_aggregation const& agg); virtual void visit(class covariance_aggregation const& agg); virtual void visit(class correlation_aggregation const& agg); virtual void visit(class tdigest_aggregation const& agg); @@ -251,6 +257,25 @@ class count_aggregation final : public rolling_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived class for specifying a histogram aggregation + */ +class histogram_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + histogram_aggregation() : aggregation(HISTOGRAM) {} + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived class for specifying an any aggregation */ @@ -972,6 +997,25 @@ class merge_m2_aggregation final : public groupby_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived aggregation class for specifying MERGE_HISTOGRAM aggregation + */ +class merge_histogram_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + explicit merge_histogram_aggregation() : aggregation{MERGE_HISTOGRAM} {} + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived aggregation class for specifying COVARIANCE aggregation */ @@ -1148,6 +1192,12 @@ struct target_type_impl { using type = size_type; }; +// Use list for HISTOGRAM +template +struct target_type_impl { + using type = list_view; +}; + // Computing ANY of any type, use bool accumulator template struct target_type_impl { @@ -1326,6 +1376,12 @@ struct target_type_impl { using type = struct_view; }; +// Use list for MERGE_HISTOGRAM +template +struct target_type_impl { + using type = list_view; +}; + // Always use double for COVARIANCE template struct target_type_impl { @@ -1417,6 +1473,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::COUNT_ALL: return f.template operator()(std::forward(args)...); + case aggregation::HISTOGRAM: + return f.template operator()(std::forward(args)...); case aggregation::ANY: return f.template operator()(std::forward(args)...); case aggregation::ALL: @@ -1460,6 +1518,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::MERGE_M2: return f.template operator()(std::forward(args)...); + case aggregation::MERGE_HISTOGRAM: + return f.template operator()(std::forward(args)...); case aggregation::COVARIANCE: return f.template operator()(std::forward(args)...); case aggregation::CORRELATION: diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 2d2b43f1d4a..f63d1922950 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -14,12 +14,15 @@ * limitations under the License. */ +#include +#include #include #include #include #include #include +#include #include #include @@ -29,6 +32,7 @@ namespace cudf::detail { +using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; using hash_map_type = cuco::static_map; diff --git a/cpp/include/cudf/reduction/detail/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp new file mode 100644 index 00000000000..97c711fda4e --- /dev/null +++ b/cpp/include/cudf/reduction/detail/histogram.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf::reduction::detail { + +/** + * @brief Compute the frequency for each distinct row in the input table. + * + * @param input The input table to compute histogram + * @param partial_counts An optional column containing count for each row + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate memory of the returned objects + * @return A pair of array contains the (stable-order) indices of the distinct rows in the input + * table, and their corresponding distinct counts + */ +[[nodiscard]] std::pair>, std::unique_ptr> +compute_row_frequencies(table_view const& input, + std::optional const& partial_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Create an empty histogram column. + * + * A histogram column is a structs column `STRUCT` where T is type of the input + * values. + * + * @returns An empty histogram column + */ +[[nodiscard]] std::unique_ptr make_empty_histogram_like(column_view const& values); + +} // namespace cudf::reduction::detail diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 014a6ba70eb..704332c8e1d 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -131,6 +131,33 @@ std::unique_ptr all(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Compute frequency for each unique element in the input column. + * + * The result histogram is stored in structs column having two children. The first child contains + * unique elements from the input, and the second child contains their corresponding frequencies. + * + * @param input The column to compute histogram + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return A list_scalar storing a structs column as the result histogram + */ +std::unique_ptr histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief Merge multiple histograms together. + * + * @param input The input given as multiple histograms concatenated together + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return A list_scalar storing the result histogram + */ +std::unique_ptr merge_histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Computes product of elements in input column * diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 2e6a643484e..b3f2a774a60 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -64,6 +64,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, histogram_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, any_aggregation const& agg) { @@ -196,6 +202,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, merge_histogram_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, covariance_aggregation const& agg) { @@ -246,6 +258,10 @@ void aggregation_finalizer::visit(count_aggregation const& agg) { visit(static_cast(agg)); } +void aggregation_finalizer::visit(histogram_aggregation const& agg) +{ + visit(static_cast(agg)); +} void aggregation_finalizer::visit(any_aggregation const& agg) { @@ -357,6 +373,11 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(merge_histogram_aggregation const& agg) +{ + visit(static_cast(agg)); +} + void aggregation_finalizer::visit(covariance_aggregation const& agg) { visit(static_cast(agg)); @@ -460,6 +481,16 @@ template std::unique_ptr make_count_aggregation make_count_aggregation( null_policy null_handling); +/// Factory to create a HISTOGRAM aggregation +template +std::unique_ptr make_histogram_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_histogram_aggregation(); +template std::unique_ptr make_histogram_aggregation(); +template std::unique_ptr make_histogram_aggregation(); + /// Factory to create a ANY aggregation template std::unique_ptr make_any_aggregation() @@ -764,6 +795,17 @@ std::unique_ptr make_merge_m2_aggregation() template std::unique_ptr make_merge_m2_aggregation(); template std::unique_ptr make_merge_m2_aggregation(); +/// Factory to create a MERGE_HISTOGRAM aggregation +template +std::unique_ptr make_merge_histogram_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_merge_histogram_aggregation(); +template std::unique_ptr +make_merge_histogram_aggregation(); +template std::unique_ptr make_merge_histogram_aggregation(); + /// Factory to create a COVARIANCE aggregation template std::unique_ptr make_covariance_aggregation(size_type min_periods, size_type ddof) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index ce1fc71968f..e3c021eb66a 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,15 @@ struct empty_column_constructor { 0, make_empty_column(type_to_id()), empty_like(values), 0, {}); } + if constexpr (k == aggregation::Kind::HISTOGRAM) { + return make_lists_column(0, + make_empty_column(type_to_id()), + cudf::reduction::detail::make_empty_histogram_like(values), + 0, + {}); + } + if constexpr (k == aggregation::Kind::MERGE_HISTOGRAM) { return empty_like(values); } + if constexpr (k == aggregation::Kind::RANK) { auto const& rank_agg = dynamic_cast(agg); if (rank_agg._method == cudf::rank_method::AVERAGE or diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 3f977dc81d7..10c271f76f9 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -89,6 +89,18 @@ void aggregate_result_functor::operator()(aggregation co detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) return; + + cache.add_result( + values, + agg, + detail::group_histogram( + get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr)); +} + template <> void aggregate_result_functor::operator()(aggregation const& agg) { @@ -534,6 +546,24 @@ void aggregate_result_functor::operator()(aggregation con get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } +/** + * @brief Perform merging for multiple histograms that correspond to the same key value. + * + * The partial results input to this aggregation is a structs column that is concatenated from + * multiple outputs of HISTOGRAM aggregations. + */ +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) { return; } + + cache.add_result( + values, + agg, + detail::group_merge_histogram( + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); +} + /** * @brief Creates column views with only valid elements in both input column views * diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu new file mode 100644 index 00000000000..bb70037aaef --- /dev/null +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace cudf::groupby::detail { + +namespace { + +std::unique_ptr build_histogram(column_view const& values, + cudf::device_span group_labels, + std::optional const& partial_counts, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), + "Size of values column should be the same as that of group labels.", + std::invalid_argument); + + // Attach group labels to the input values. + auto const labels_cv = column_view{data_type{type_to_id()}, + static_cast(group_labels.size()), + group_labels.data(), + nullptr, + 0}; + auto const labeled_values = table_view{{labels_cv, values}}; + + // Build histogram for the labeled values. + auto [distinct_indices, distinct_counts] = + cudf::reduction::detail::compute_row_frequencies(labeled_values, partial_counts, stream, mr); + + // Gather the distinct rows for the output histogram. + auto out_table = cudf::detail::gather(labeled_values, + *distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + + // Build offsets for the output lists column containing output histograms. + // Each list will be a histogram corresponding to one value group. + auto out_offsets = cudf::lists::detail::reconstruct_offsets( + out_table->get_column(0).view(), num_groups, stream, mr); + + std::vector> struct_children; + struct_children.emplace_back(std::move(out_table->release().back())); + struct_children.emplace_back(std::move(distinct_counts)); + auto out_structs = make_structs_column(static_cast(distinct_indices->size()), + std::move(struct_children), + 0, + {}, + stream, + mr); + + return make_lists_column( + num_groups, std::move(out_offsets), std::move(out_structs), 0, {}, stream, mr); +} + +} // namespace + +std::unique_ptr group_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Empty group should be handled before reaching here. + CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); + + return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr); +} + +std::unique_ptr group_merge_histogram(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Empty group should be handled before reaching here. + CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); + + // The input must be a lists column without nulls. + CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls.", std::invalid_argument); + CUDF_EXPECTS(values.type().id() == type_id::LIST, + "The input of MERGE_HISTOGRAM aggregation must be a lists column.", + std::invalid_argument); + + // Child of the input lists column must be a structs column without nulls, + // and its second child is a columns of integer type having no nulls. + auto const lists_cv = lists_column_view{values}; + auto const histogram_cv = lists_cv.get_sliced_child(stream); + CUDF_EXPECTS(!histogram_cv.has_nulls(), + "Child of the input lists column must not have nulls.", + std::invalid_argument); + CUDF_EXPECTS(histogram_cv.type().id() == type_id::STRUCT && histogram_cv.num_children() == 2, + "The input column has invalid histograms structure.", + std::invalid_argument); + CUDF_EXPECTS( + cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(), + "The input column has invalid histograms structure.", + std::invalid_argument); + + // Concatenate the histograms corresponding to the same key values. + // That is equivalent to creating a new lists column (view) from the input lists column + // with new offsets gathered as below. + auto new_offsets = rmm::device_uvector(num_groups + 1, stream); + thrust::gather(rmm::exec_policy(stream), + group_offsets.begin(), + group_offsets.end(), + lists_cv.offsets_begin(), + new_offsets.begin()); + + // Generate labels for the new lists. + auto key_labels = rmm::device_uvector(histogram_cv.size(), stream); + cudf::detail::label_segments( + new_offsets.begin(), new_offsets.end(), key_labels.begin(), key_labels.end(), stream); + + auto const structs_cv = structs_column_view{histogram_cv}; + auto const input_values = structs_cv.get_sliced_child(0, stream); + auto const input_counts = structs_cv.get_sliced_child(1, stream); + + return build_histogram(input_values, key_labels, input_counts, num_groups, stream, mr); +} + +} // namespace cudf::groupby::detail diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index fc24b679db5..3aa79f226a3 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -216,6 +216,33 @@ std::unique_ptr group_count_all(cudf::device_span group size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to compute histogram for each group in @p values. + * + * The returned column is a lists column, each list corresponds to one input group and stores the + * histogram of the distinct elements in that group in the form of `STRUCT`. + * + * Note that the order of distinct elements in each output list is not specified. + * + * @code{.pseudo} + * values = [2, 1, 1, 3, 5, 2, 2, 3, 1, 4] + * group_labels = [0, 0, 0, 1, 1, 1, 1, 1, 2, 2] + * num_groups = 3 + * + * output = [[<1, 2>, <2, 1>], [<2, 2>, <3, 2>, <5, 1>], [<1, 1>, <4, 1>]] + * @endcode + * + * @param values Grouped values to compute histogram + * @param group_labels ID of group that the corresponding value belongs to + * @param num_groups Number of groups + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr group_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Internal API to calculate sum of squares of differences from means. @@ -441,6 +468,34 @@ std::unique_ptr group_merge_m2(column_view const& values, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + +/** + * @brief Internal API to merge multiple output of HISTOGRAM aggregation. + * + * The input values column should be given as a lists column in the form of + * `LIST>`. + * After merging, the order of distinct elements in each output list is not specified. + * + * @code{.pseudo} + * values = [ [<1, 2>, <2, 1>], [<2, 2>], [<3, 2>, <2, 1>], [<1, 1>, <2, 1>] ] + * group_offsets = [ 0, 2, 4] + * num_groups = 2 + * + * output = [[<1, 2>, <2, 3>], [<1, 1>, <2, 2>, <3, 2>]]] + * @endcode + * + * @param values Grouped values to get valid count of + * @param group_offsets Offsets of groups' starting points within @p values + * @param num_groups Number of groups + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr group_merge_histogram(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to find covariance of child columns of a non-nullable struct column. * diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu new file mode 100644 index 00000000000..fa84bbeb25d --- /dev/null +++ b/cpp/src/reductions/histogram.cu @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +namespace cudf::reduction::detail { + +namespace { + +// Always use 64-bit signed integer for storing count. +using histogram_count_type = int64_t; + +/** + * @brief The functor to accumulate the frequency of each distinct rows in the input table. + */ +template +struct reduce_fn : cudf::detail::reduce_by_row_fn_base { + CountType const* d_partial_output; + + reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + CountType* const d_output, + CountType const* const d_partial_output) + : cudf::detail::reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, + d_partial_output{d_partial_output} + { + } + + // Count the number of rows in each group of rows that are compared equal. + __device__ void operator()(size_type const idx) const + { + auto const increment = d_partial_output ? d_partial_output[idx] : CountType{1}; + auto const count = + cuda::atomic_ref(*this->get_output_ptr(idx)); + count.fetch_add(increment, cuda::std::memory_order_relaxed); + } +}; + +/** + * @brief The builder to construct an instance of `reduce_fn` functor. + */ +template +struct reduce_func_builder { + CountType const* const d_partial_output; + + reduce_func_builder(CountType const* const d_partial_output) : d_partial_output{d_partial_output} + { + } + + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + CountType* const d_output) + { + return reduce_fn{ + d_map, d_hasher, d_equal, d_output, d_partial_output}; + } +}; + +/** + * @brief Specialized functor to check for not-zero of the second component of the input. + */ +struct is_not_zero { + template + __device__ bool operator()(Pair const input) const + { + return thrust::get<1>(input) != 0; + } +}; + +/** + * @brief Building a histogram by gathering distinct rows from the input table and their + * corresponding distinct counts. + * + * @param input The input table + * @param distinct_indices Indices of the distinct rows + * @param distinct_counts Distinct counts corresponding to the distinct rows + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned object's device memory + * @return A list_scalar storing the output histogram + */ +auto gather_histogram(table_view const& input, + device_span distinct_indices, + std::unique_ptr&& distinct_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto distinct_rows = cudf::detail::gather(input, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + + std::vector> struct_children; + struct_children.emplace_back(std::move(distinct_rows->release().front())); + struct_children.emplace_back(std::move(distinct_counts)); + auto output_structs = make_structs_column( + static_cast(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr); + + return std::make_unique( + std::move(*output_structs.release()), true, stream, mr); +} + +} // namespace + +std::unique_ptr make_empty_histogram_like(column_view const& values) +{ + std::vector> struct_children; + struct_children.emplace_back(empty_like(values)); + struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); + return std::make_unique(data_type{type_id::STRUCT}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(struct_children)); +} + +std::pair>, std::unique_ptr> +compute_row_frequencies(table_view const& input, + std::optional const& partial_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const has_nested_columns = cudf::detail::has_nested_columns(input); + + // Nested types are not tested, thus we just throw exception if we see such input for now. + // We should remove this check after having enough tests. + CUDF_EXPECTS(!has_nested_columns, + "Nested types are not yet supported in histogram aggregation.", + std::invalid_argument); + + auto map = cudf::detail::hash_map_type{ + compute_hash_table_size(input.num_rows()), + cuco::empty_key{-1}, + cuco::empty_value{std::numeric_limits::min()}, + cudf::detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + + auto const preprocessed_input = + cudf::experimental::row::hash::preprocessed_table::create(input, stream); + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + + auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const key_hasher = row_hasher.device_hasher(has_nulls); + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const pair_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); + + // Always compare NaNs as equal. + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + auto const value_comp = nan_equal_comparator{}; + + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } + + // Gather the indices of distinct rows. + auto distinct_indices = std::make_unique>( + static_cast(map.get_size()), stream, mr); + + // Store the number of occurrences of each distinct row. + auto distinct_counts = make_numeric_column(data_type{type_to_id()}, + static_cast(map.get_size()), + mask_state::UNALLOCATED, + stream, + mr); + + // Compute frequencies (aka distinct counts) for the input rows. + // Note that we consider null and NaNs as always equal. + auto const reduction_results = cudf::detail::hash_reduce_by_row( + map, + preprocessed_input, + input.num_rows(), + has_nulls, + has_nested_columns, + null_equality::EQUAL, + nan_equality::ALL_EQUAL, + reduce_func_builder{ + partial_counts ? partial_counts.value().begin() : nullptr}, + histogram_count_type{0}, + stream, + rmm::mr::get_current_device_resource()); + + auto const input_it = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); + auto const output_it = thrust::make_zip_iterator(thrust::make_tuple( + distinct_indices->begin(), distinct_counts->mutable_view().begin())); + + // Reduction results above are either group sizes of equal rows, or `0`. + // The final output is non-zero group sizes only. + thrust::copy_if( + rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_not_zero{}); + + return {std::move(distinct_indices), std::move(distinct_counts)}; +} + +std::unique_ptr histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Empty group should be handled before reaching here. + CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument); + + auto const input_tv = table_view{{input}}; + auto [distinct_indices, distinct_counts] = + compute_row_frequencies(input_tv, std::nullopt, stream, mr); + return gather_histogram(input_tv, *distinct_indices, std::move(distinct_counts), stream, mr); +} + +std::unique_ptr merge_histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Empty group should be handled before reaching here. + CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument); + CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls.", std::invalid_argument); + CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2, + "The input must be a structs column having two children.", + std::invalid_argument); + CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), + "The second child of the input column must be of integral type and without nulls.", + std::invalid_argument); + + auto const structs_cv = structs_column_view{input}; + auto const input_values = structs_cv.get_sliced_child(0, stream); + auto const input_counts = structs_cv.get_sliced_child(1, stream); + + auto const values_tv = table_view{{input_values}}; + auto [distinct_indices, distinct_counts] = + compute_row_frequencies(values_tv, input_counts, stream, mr); + return gather_histogram(values_tv, *distinct_indices, std::move(distinct_counts), stream, mr); +} + +} // namespace cudf::reduction::detail diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 2fef8aa8785..23171baaa45 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,8 @@ struct reduce_dispatch_functor { case aggregation::MAX: return max(col, output_dtype, init, stream, mr); case aggregation::ANY: return any(col, output_dtype, init, stream, mr); case aggregation::ALL: return all(col, output_dtype, init, stream, mr); + case aggregation::HISTOGRAM: return histogram(col, stream, mr); + case aggregation::MERGE_HISTOGRAM: return merge_histogram(col, stream, mr); case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { @@ -165,6 +168,15 @@ std::unique_ptr reduce(column_view const& col, return tdigest::detail::make_empty_tdigest_scalar(stream, mr); } + if (agg.kind == aggregation::HISTOGRAM) { + return std::make_unique( + std::move(*reduction::detail::make_empty_histogram_like(col)), true, stream, mr); + } + if (agg.kind == aggregation::MERGE_HISTOGRAM) { + return std::make_unique( + std::move(*reduction::detail::make_empty_histogram_like(col.child(0))), true, stream, mr); + } + if (output_dtype.id() == type_id::LIST) { if (col.type() == output_dtype) { return make_empty_scalar_like(col, stream, mr); } // Under some circumstance, the output type will become the List of input type, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 68ff6c54c99..04939f3cd6d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -116,6 +116,7 @@ ConfigureTest( groupby/covariance_tests.cpp groupby/groupby_test_util.cpp groupby/groups_tests.cpp + groupby/histogram_tests.cpp groupby/keys_tests.cpp groupby/lists_tests.cpp groupby/m2_tests.cpp diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp new file mode 100644 index 00000000000..c5833f40cf2 --- /dev/null +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; + +auto groupby_histogram(cudf::column_view const& keys, + cudf::column_view const& values, + cudf::aggregation::Kind agg_kind) +{ + CUDF_EXPECTS( + agg_kind == cudf::aggregation::HISTOGRAM || agg_kind == cudf::aggregation::MERGE_HISTOGRAM, + "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); + + std::vector requests; + requests.emplace_back(); + requests[0].values = values; + if (agg_kind == cudf::aggregation::HISTOGRAM) { + requests[0].aggregations.push_back( + cudf::make_histogram_aggregation()); + } else { + requests[0].aggregations.push_back( + cudf::make_merge_histogram_aggregation()); + } + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto const agg_results = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + auto const agg_histogram = agg_results.second[0].results[0]->view(); + EXPECT_EQ(agg_histogram.type().id(), cudf::type_id::LIST); + EXPECT_EQ(agg_histogram.null_count(), 0); + + auto const histograms = cudf::lists_column_view{agg_histogram}.child(); + EXPECT_EQ(histograms.num_children(), 2); + EXPECT_EQ(histograms.null_count(), 0); + EXPECT_EQ(histograms.child(1).null_count(), 0); + + auto const key_sort_order = cudf::sorted_order(agg_results.first->view(), {}, {}); + auto sorted_keys = + std::move(cudf::gather(agg_results.first->view(), *key_sort_order)->release().front()); + auto const sorted_vals = + std::move(cudf::gather(cudf::table_view{{agg_histogram}}, *key_sort_order)->release().front()); + auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals}, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + rmm::mr::get_current_device_resource()); + + return std::pair{std::move(sorted_keys), std::move(sorted_histograms)}; +} + +template +struct GroupbyHistogramTest : public cudf::test::BaseFixture {}; + +template +struct GroupbyMergeHistogramTest : public cudf::test::BaseFixture {}; + +// Avoid unsigned types, as the tests below have negative values in their input. +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(GroupbyHistogramTest, HistogramTestTypes); +TYPED_TEST_SUITE(GroupbyMergeHistogramTest, HistogramTestTypes); + +TYPED_TEST(GroupbyHistogramTest, EmptyInput) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + auto const keys = int32s_col{}; + auto const values = col_data{}; + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + + // The structure of the output is already verified in the function `groupby_histogram`. + ASSERT_EQ(res_histogram->size(), 0); +} + +TYPED_TEST(GroupbyHistogramTest, SimpleInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: values = [2, 2, -3, -2, 2] + // key = 1: values = [2, 0, 5, 2, 1] + // key = 2: values = [-3, 1, 1, 2, 2] + auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2}; + auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, SlicedInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + auto const keys_original = int32s_col{2, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values_original = + col_data{1, 2, 0, 2, 1, -3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + // key = 0: values = [2, 2, -3, -2, 2] + // key = 1: values = [2, 0, 5, 2, 1] + // key = 2: values = [-3, 1, 1, 2, 2] + auto const keys = cudf::slice(keys_original, {5, 20})[0]; + auto const values = cudf::slice(values_original, {5, 20})[0]; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2}; + auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, InputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + // key = 0: values = [-3, null, 2, null, 2] + // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1] + // key = 2: values = [null, 2, 0, -2, 2, null, 2] + auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values = + col_data{{null, -3, 2, 1, 2, null, 0, 5, 2, null, -3, -2, 2, null, 1, 2, null, 1, 2, 2}, + nulls_at({0, 5, 9, 13, 16})}; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})}; + auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, SlicedInputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const keys_original = + int32s_col{1, 0, 2, 2, 0, 2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2, 0, 1, 2}; + auto const values_original = + col_data{{null, 1, 1, 2, 1, null, -3, 2, 1, 2, null, 0, 5, 2, + null, -3, -2, 2, null, 1, 2, null, 1, 2, 2, null, 1, 2}, + nulls_at({0, 5, 10, 14, 18, 21, 25})}; + + // key = 0: values = [-3, null, 2, null, 2] + // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1] + // key = 2: values = [null, 2, 0, -2, 2, null, 2] + auto const keys = cudf::slice(keys_original, {5, 25})[0]; + auto const values = cudf::slice(values_original, {5, 25})[0]; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})}; + auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, EmptyInput) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + auto const keys = int32s_col{}; + auto const values = [] { + auto structs = [] { + auto values = col_data{}; + auto counts = int64s_col{}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 0, int32s_col{}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + auto const [res_keys, res_histogram] = + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); + + // The structure of the output is already verified in the function `groupby_histogram`. + ASSERT_EQ(res_histogram->size(), 0); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SimpleInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]] + // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys = int32s_col{0, 1, 0, 1, 0}; + auto const values = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1}; + auto counts = int64s_col{1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 5, int32s_col{0, 3, 6, 8, 11, 14}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2}; + auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]] + // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys_original = int32s_col{0, 1, 0, 1, 0, 1, 0}; + auto const values_original = [] { + auto structs = [] { + auto values = col_data{0, 2, -3, 1, -3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1}; + auto counts = int64s_col{1, 2, 3, 1, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column(7, + int32s_col{0, 2, 4, 7, 10, 12, 15, 18}.release(), + structs.release(), + 0, + rmm::device_buffer{}); + }(); + auto const keys = cudf::slice(keys_original, {2, 7})[0]; + auto const values = cudf::slice(*values_original, {2, 7})[0]; + + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2}; + auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, InputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + // key = 0: histograms = [[, <2, 3>], [, <1, 1>], [<0, 1>, <1, 2>]] + // key = 1: histograms = [[, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys = int32s_col{0, 1, 1, 0, 0}; + auto const values = [] { + auto structs = [] { + auto values = col_data{{null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1}, nulls_at({0, 2, 8})}; + auto counts = int64s_col{1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 5, int32s_col{0, 2, 5, 8, 10, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})}; + auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + // key = 0: histograms = [[, <2, 3>], [, <1, 1>], [<0, 1>, <1, 2>]] + // key = 1: histograms = [[, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys_original = int32s_col{0, 1, 0, 1, 1, 0, 0}; + auto const values_original = [] { + auto structs = [] { + auto values = col_data{{null, 2, null, 1, null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1}, + nulls_at({0, 2, 4, 6, 12})}; + auto counts = int64s_col{1, 3, 2, 1, 1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column(7, + int32s_col{0, 2, 4, 6, 9, 12, 14, 16}.release(), + structs.release(), + 0, + rmm::device_buffer{}); + }(); + auto const keys = cudf::slice(keys_original, {2, 7})[0]; + auto const values = cudf::slice(*values_original, {2, 7})[0]; + + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})}; + auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 2561f3f9886..7644ac48892 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -379,6 +380,212 @@ TYPED_TEST(ReductionTest, SumOfSquare) expected_null_value); } +auto histogram_reduction(cudf::column_view const& input, + std::unique_ptr const& agg) +{ + CUDF_EXPECTS( + agg->kind == cudf::aggregation::HISTOGRAM || agg->kind == cudf::aggregation::MERGE_HISTOGRAM, + "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); + + auto const result_scalar = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + EXPECT_EQ(result_scalar->is_valid(), true); + + auto const result_list_scalar = dynamic_cast(result_scalar.get()); + EXPECT_NE(result_list_scalar, nullptr); + + auto const histogram = result_list_scalar->view(); + EXPECT_EQ(histogram.num_children(), 2); + EXPECT_EQ(histogram.null_count(), 0); + EXPECT_EQ(histogram.child(1).null_count(), 0); + + // Sort the histogram based on the first column (unique input values). + auto const sort_order = cudf::sorted_order(cudf::table_view{{histogram.child(0)}}, {}, {}); + return std::move(cudf::gather(cudf::table_view{{histogram}}, *sort_order)->release().front()); +} + +template +struct ReductionHistogramTest : public cudf::test::BaseFixture {}; + +// Avoid unsigned types, as the tests below have negative values in their input. +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes); + +TYPED_TEST(ReductionHistogramTest, Histogram) +{ + using data_col = cudf::test::fixed_width_column_wrapper; + using int64_col = cudf::test::fixed_width_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const agg = cudf::make_histogram_aggregation(); + + // Empty input. + { + auto const input = data_col{}; + auto const expected = [] { + auto child1 = data_col{}; + auto child2 = int64_col{}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + { + auto const input = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const expected = [] { + auto child1 = data_col{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test without nulls, sliced input. + { + auto const input_original = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const input = cudf::slice(input_original, {0, 7})[0]; + auto const expected = [] { + auto child1 = data_col{-3, 0, 1, 2, 5}; + auto child2 = int64_col{1, 1, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls. + using namespace cudf::test::iterators; + auto constexpr null{0}; + { + auto const input = data_col{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, + nulls_at({0, 6, 9, 12})}; + auto const expected = [] { + auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{4, 2, 1, 1, 2, 4, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls, sliced input. + { + auto const input_original = data_col{ + {null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; + auto const input = cudf::slice(input_original, {0, 9})[0]; + auto const expected = [] { + auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{2, 1, 1, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + +TYPED_TEST(ReductionHistogramTest, MergeHistogram) +{ + using data_col = cudf::test::fixed_width_column_wrapper; + using int64_col = cudf::test::fixed_width_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const agg = cudf::make_merge_histogram_aggregation(); + + // Empty input. + { + auto const input = [] { + auto child1 = data_col{}; + auto child2 = int64_col{}; + return structs_col{{child1, child2}}; + }(); + auto const expected = [] { + auto child1 = data_col{}; + auto child2 = int64_col{}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test without nulls. + { + auto const input = [] { + auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = data_col{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_col{5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test without nulls, sliced input. + { + auto const input_original = [] { + auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 7})[0]; + + auto const expected = [] { + auto child1 = data_col{-3, 0, 1, 2, 5}; + auto child2 = int64_col{2, 4, 1, 5, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls. + using namespace cudf::test::iterators; + auto constexpr null{0}; + { + auto const input = [] { + auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{67, 5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls, sliced input. + { + auto const input_original = [] { + auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 9})[0]; + + auto const expected = [] { + auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{33, 2, 4, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + template struct ReductionAnyAllTest : public ReductionTest {}; using AnyAllTypes = cudf::test::Types; From a97020f9c7e4e2be86788b5f7d83608839d3207b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 27 Sep 2023 13:33:48 -0400 Subject: [PATCH 27/76] Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 (#14108) Compile issues found by compiling libcudf with the `rapidsai/devcontainers:23.10-cpp-gcc9-cuda12.2-ubuntu20.04` docker container. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Mark Harris (https://github.com/harrism) - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/14108 --- cpp/src/io/avro/avro_common.hpp | 3 +- cpp/src/io/comp/unsnap.cu | 18 +++--- cpp/src/io/orc/orc_gpu.hpp | 39 +++++------- cpp/src/io/orc/stats_enc.cu | 10 +-- cpp/src/io/orc/stripe_init.cu | 29 ++++----- cpp/src/io/parquet/page_decode.cuh | 67 +++++++++++---------- cpp/src/io/parquet/page_hdr.cu | 12 ++-- cpp/src/io/parquet/parquet_gpu.hpp | 56 ++++++++--------- cpp/src/io/statistics/column_statistics.cuh | 12 ++-- cpp/src/io/statistics/statistics.cuh | 30 ++++----- 10 files changed, 138 insertions(+), 138 deletions(-) diff --git a/cpp/src/io/avro/avro_common.hpp b/cpp/src/io/avro/avro_common.hpp index ff8ee206dd4..0058d236d8c 100644 --- a/cpp/src/io/avro/avro_common.hpp +++ b/cpp/src/io/avro/avro_common.hpp @@ -25,7 +25,8 @@ namespace cudf { namespace io { namespace avro { struct block_desc_s { - block_desc_s() {} + block_desc_s() = default; // required to compile on ctk-12.2 + aarch64 + explicit constexpr block_desc_s( size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_) : offset(offset_), diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index c699502317f..504a2fe377c 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -52,6 +52,8 @@ struct unsnap_batch_s { * @brief Queue structure used to exchange data between warps */ struct unsnap_queue_s { + unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64 + uint32_t prefetch_wrpos; ///< Prefetcher write position uint32_t prefetch_rdpos; ///< Prefetch consumer read position int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher) @@ -64,13 +66,15 @@ struct unsnap_queue_s { * @brief snappy decompression state */ struct unsnap_state_s { - uint8_t const* base; ///< base ptr of compressed stream - uint8_t const* end; ///< end of compressed stream - uint32_t uncompressed_size; ///< uncompressed stream size - uint32_t bytes_left; ///< remaining bytes to decompress - int32_t error; ///< current error status - uint32_t tstart; ///< start time for perf logging - volatile unsnap_queue_s q; ///< queue for cross-warp communication + constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64 + + uint8_t const* base{}; ///< base ptr of compressed stream + uint8_t const* end{}; ///< end of compressed stream + uint32_t uncompressed_size{}; ///< uncompressed stream size + uint32_t bytes_left{}; ///< remaining bytes to decompress + int32_t error{}; ///< current error status + uint32_t tstart{}; ///< start time for perf logging + volatile unsnap_queue_s q{}; ///< queue for cross-warp communication device_span src; ///< input for current block device_span dst; ///< output for current block }; diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 9b8df50a22a..dba7a9ffda5 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -59,31 +59,24 @@ struct CompressedStreamInfo { explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_) : compressed_data(compressed_data_), uncompressed_data(nullptr), - compressed_data_size(compressed_size_), - dec_in_ctl(nullptr), - dec_out_ctl(nullptr), - copy_in_ctl(nullptr), - copy_out_ctl(nullptr), - num_compressed_blocks(0), - num_uncompressed_blocks(0), - max_uncompressed_size(0), - max_uncompressed_block_size(0) + compressed_data_size(compressed_size_) { } - uint8_t const* compressed_data; // [in] base ptr to compressed stream data - uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size; // [in] compressed data size for this stream - device_span* dec_in_ctl; // [in] input buffer to decompress - device_span* dec_out_ctl; // [in] output buffer to decompress into - device_span dec_res; // [in] results of decompression - device_span* copy_in_ctl; // [out] input buffer to copy - device_span* copy_out_ctl; // [out] output buffer to copy to - uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed - // blocks(out) - uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of - // uncompressed blocks(out) - uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream - uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream + uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data + uint8_t* + uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet + size_t compressed_data_size{}; // [in] compressed data size for this stream + device_span* dec_in_ctl{}; // [in] input buffer to decompress + device_span* dec_out_ctl{}; // [in] output buffer to decompress into + device_span dec_res{}; // [in] results of decompression + device_span* copy_in_ctl{}; // [out] input buffer to copy + device_span* copy_out_ctl{}; // [out] output buffer to copy to + uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of + // compressed blocks(out) + uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of + // uncompressed blocks(out) + uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream + uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream }; enum StreamIndexType { diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 69d7ec95acd..95f1db5bfd1 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -134,11 +134,11 @@ __global__ void __launch_bounds__(block_size, 1) } struct stats_state_s { - uint8_t* base; ///< Output buffer start - uint8_t* end; ///< Output buffer end - statistics_chunk chunk; - statistics_merge_group group; - statistics_dtype stats_dtype; //!< Statistics data type for this column + uint8_t* base{}; ///< Output buffer start + uint8_t* end{}; ///< Output buffer end + statistics_chunk chunk{}; + statistics_merge_group group{}; + statistics_dtype stats_dtype{}; //!< Statistics data type for this column }; /* diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index d8a60350356..8eeca504121 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -30,14 +30,14 @@ namespace orc { namespace gpu { struct comp_in_out { - uint8_t const* in_ptr; - size_t in_size; - uint8_t* out_ptr; - size_t out_size; + uint8_t const* in_ptr{}; + size_t in_size{}; + uint8_t* out_ptr{}; + size_t out_size{}; }; struct compressed_stream_s { - CompressedStreamInfo info; - comp_in_out ctl; + CompressedStreamInfo info{}; + comp_in_out ctl{}; }; // blockDim {128,1,1} @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8) * @brief Shared mem state for gpuParseRowGroupIndex */ struct rowindex_state_s { - ColumnDesc chunk; - uint32_t rowgroup_start; - uint32_t rowgroup_end; - int is_compressed; - uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 - CompressedStreamInfo strm_info[2]; - RowGroup rowgroups[128]; - uint32_t compressed_offset[128][2]; + ColumnDesc chunk{}; + uint32_t rowgroup_start{}; + uint32_t rowgroup_end{}; + int is_compressed{}; + uint32_t row_index_entry[3] + [CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 + CompressedStreamInfo strm_info[2]{}; + RowGroup rowgroups[128]{}; + uint32_t compressed_offset[128][2]{}; }; enum row_entry_state_e { diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 26e3c951b2e..5e66885d746 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -26,48 +26,49 @@ namespace cudf::io::parquet::gpu { struct page_state_s { - uint8_t const* data_start; - uint8_t const* data_end; - uint8_t const* lvl_end; - uint8_t const* dict_base; // ptr to dictionary page data - int32_t dict_size; // size of dictionary data - int32_t first_row; // First row in page to output - int32_t num_rows; // Rows in page to decode (including rows to be skipped) - int32_t first_output_value; // First value in page to output - int32_t num_input_values; // total # of input/level values in the page - int32_t dtype_len; // Output data type length - int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit - int32_t dict_bits; // # of bits to store dictionary indices - uint32_t dict_run; - int32_t dict_val; - uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep] - int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep] - int32_t error; - PageInfo page; - ColumnChunkDesc col; + constexpr page_state_s() noexcept {} + uint8_t const* data_start{}; + uint8_t const* data_end{}; + uint8_t const* lvl_end{}; + uint8_t const* dict_base{}; // ptr to dictionary page data + int32_t dict_size{}; // size of dictionary data + int32_t first_row{}; // First row in page to output + int32_t num_rows{}; // Rows in page to decode (including rows to be skipped) + int32_t first_output_value{}; // First value in page to output + int32_t num_input_values{}; // total # of input/level values in the page + int32_t dtype_len{}; // Output data type length + int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit + int32_t dict_bits{}; // # of bits to store dictionary indices + uint32_t dict_run{}; + int32_t dict_val{}; + uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t error{}; + PageInfo page{}; + ColumnChunkDesc col{}; // (leaf) value decoding - int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer) - int32_t dict_pos; // write position of dictionary indices - int32_t src_pos; // input read position of final output value - int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale + int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer) + int32_t dict_pos{}; // write position of dictionary indices + int32_t src_pos{}; // input read position of final output value + int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale // repetition/definition level decoding - int32_t input_value_count; // how many values of the input we've processed - int32_t input_row_count; // how many rows of the input we've processed - int32_t input_leaf_count; // how many leaf values of the input we've processed - uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep] - int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded - int32_t row_index_lower_bound; // lower bound of row indices we should process + int32_t input_value_count{}; // how many values of the input we've processed + int32_t input_row_count{}; // how many rows of the input we've processed + int32_t input_leaf_count{}; // how many leaf values of the input we've processed + uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded + int32_t row_index_lower_bound{}; // lower bound of row indices we should process // a shared-memory cache of frequently used data when decoding. The source of this data is // normally stored in global memory which can yield poor performance. So, when possible // we copy that info here prior to decoding - PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]; + PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{}; // points to either nesting_decode_cache above when possible, or to the global source otherwise - PageNestingDecodeInfo* nesting_info; + PageNestingDecodeInfo* nesting_info{}; }; // buffers only used in the decode kernel. separated from page_state_s to keep diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 0d611643b46..6f8b2f50443 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0, ST_FLD_LIST}; struct byte_stream_s { - uint8_t const* cur; - uint8_t const* end; - uint8_t const* base; + uint8_t const* cur{}; + uint8_t const* end{}; + uint8_t const* base{}; // Parsed symbols - PageType page_type; - PageInfo page; - ColumnChunkDesc ck; + PageType page_type{}; + PageInfo page{}; + ColumnChunkDesc ck{}; }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index a3cc37dee4f..a760c2448dc 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -228,7 +228,7 @@ struct PageInfo { * @brief Struct describing a particular chunk of column data */ struct ColumnChunkDesc { - ColumnChunkDesc() = default; + constexpr ColumnChunkDesc() noexcept {}; explicit ColumnChunkDesc(size_t compressed_size_, uint8_t* compressed_data_, size_t num_values_, @@ -275,34 +275,34 @@ struct ColumnChunkDesc { { } - uint8_t const* compressed_data; // pointer to compressed column chunk data - size_t compressed_size; // total compressed data size for this chunk - size_t num_values; // total number of values in this column - size_t start_row; // starting row of this chunk - uint32_t num_rows; // number of rows in this chunk - int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level - int16_t max_nesting_depth; // max nesting depth of the output - uint16_t data_type; // basic column data type, ((type_length << 3) | - // parquet::Type) + uint8_t const* compressed_data{}; // pointer to compressed column chunk data + size_t compressed_size{}; // total compressed data size for this chunk + size_t num_values{}; // total number of values in this column + size_t start_row{}; // starting row of this chunk + uint32_t num_rows{}; // number of rows in this chunk + int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level + int16_t max_nesting_depth{}; // max nesting depth of the output + uint16_t data_type{}; // basic column data type, ((type_length << 3) | + // parquet::Type) uint8_t - level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels - int32_t num_data_pages; // number of data pages - int32_t num_dict_pages; // number of dictionary pages - int32_t max_num_pages; // size of page_info array - PageInfo* page_info; // output page info for up to num_dict_pages + - // num_data_pages (dictionary pages first) - string_index_pair* str_dict_index; // index for string dictionary - bitmask_type** valid_map_base; // base pointers of valid bit map for this column - void** column_data_base; // base pointers of column data - void** column_string_base; // base pointers of column string data - int8_t codec; // compressed codec enum - int8_t converted_type; // converted type enum - LogicalType logical_type; // logical type - int8_t decimal_precision; // Decimal precision - int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) - - int32_t src_col_index; // my input column index - int32_t src_col_schema; // my schema index in the file + level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels + int32_t num_data_pages{}; // number of data pages + int32_t num_dict_pages{}; // number of dictionary pages + int32_t max_num_pages{}; // size of page_info array + PageInfo* page_info{}; // output page info for up to num_dict_pages + + // num_data_pages (dictionary pages first) + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + int8_t codec{}; // compressed codec enum + int8_t converted_type{}; // converted type enum + LogicalType logical_type{}; // logical type + int8_t decimal_precision{}; // Decimal precision + int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + + int32_t src_col_index{}; // my input column index + int32_t src_col_schema{}; // my schema index in the file }; /** diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index 28e77f62a43..f71fb95949f 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -34,18 +34,18 @@ namespace io { * @brief shared state for statistics calculation kernel */ struct stats_state_s { - stats_column_desc col; ///< Column information - statistics_group group; ///< Group description - statistics_chunk ck; ///< Output statistics chunk + stats_column_desc col{}; ///< Column information + statistics_group group{}; ///< Group description + statistics_chunk ck{}; ///< Output statistics chunk }; /** * @brief shared state for statistics merge kernel */ struct merge_state_s { - stats_column_desc col; ///< Column information - statistics_merge_group group; ///< Group description - statistics_chunk ck; ///< Resulting statistics chunk + stats_column_desc col{}; ///< Column information + statistics_merge_group group{}; ///< Group description + statistics_chunk ck{}; ///< Resulting statistics chunk }; template diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index 805ca43553e..b6e698fee11 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -98,27 +98,27 @@ union statistics_val { }; struct statistics_chunk { - uint32_t non_nulls; //!< number of non-null values in chunk - uint32_t null_count; //!< number of null values in chunk - statistics_val min_value; //!< minimum value in chunk - statistics_val max_value; //!< maximum value in chunk - statistics_val sum; //!< sum of chunk - uint8_t has_minmax; //!< Nonzero if min_value and max_values are valid - uint8_t has_sum; //!< Nonzero if sum is valid + uint32_t non_nulls{}; //!< number of non-null values in chunk + uint32_t null_count{}; //!< number of null values in chunk + statistics_val min_value{}; //!< minimum value in chunk + statistics_val max_value{}; //!< maximum value in chunk + statistics_val sum{}; //!< sum of chunk + uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid + uint8_t has_sum{}; //!< Nonzero if sum is valid }; struct statistics_group { - stats_column_desc const* col; //!< Column information - uint32_t start_row; //!< Start row of this group - uint32_t num_rows; //!< Number of rows in group - uint32_t non_leaf_nulls; //!< Number of null non-leaf values in the group + stats_column_desc const* col{}; //!< Column information + uint32_t start_row{}; //!< Start row of this group + uint32_t num_rows{}; //!< Number of rows in group + uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group }; struct statistics_merge_group { - data_type col_dtype; //!< Column data type - statistics_dtype stats_dtype; //!< Statistics data type for this column - uint32_t start_chunk; //!< Start chunk of this group - uint32_t num_chunks; //!< Number of chunks in group + data_type col_dtype; //!< Column data type + statistics_dtype stats_dtype{dtype_none}; //!< Statistics data type for this column + uint32_t start_chunk{}; //!< Start chunk of this group + uint32_t num_chunks{}; //!< Number of chunks in group }; template >* = nullptr> From bff0fcd721320210c53d3533e63fb34eac883f4e Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Wed, 27 Sep 2023 11:25:25 -0700 Subject: [PATCH 28/76] [Java] Add JNI bindings for `integers_to_hex` (#14205) This PR adds a method to ColumnView class to allow for conversion from Integers to hex closes #14081 Authors: - Raza Jafri (https://github.com/razajafri) Approvers: - Kuhu Shukla (https://github.com/kuhushukla) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/14205 --- .../main/java/ai/rapids/cudf/ColumnView.java | 27 +++++++++++++++++++ java/src/main/java/ai/rapids/cudf/DType.java | 19 +++++++++++++ java/src/main/native/src/ColumnViewJni.cpp | 9 +++++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 10 +++++++ 4 files changed, 65 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 3f3a55f0970..0b66701629b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -4089,6 +4089,8 @@ static DeviceMemoryBufferView getOffsetsBuffer(long viewHandle) { private static native long isFixedPoint(long viewHandle, int nativeTypeId, int scale); + private static native long toHex(long viewHandle); + /** * Native method to concatenate a list column of strings (each row is a list of strings), * concatenates the strings within each row and returns a single strings column result. @@ -5231,4 +5233,29 @@ static ColumnView[] getColumnViewsFromPointers(long[] nativeHandles) { } } } + + /** + * Convert this integer column to hexadecimal column and return a new strings column + * + * Any null entries will result in corresponding null entries in the output column. + * + * The output character set is '0'-'9' and 'A'-'F'. The output string width will + * be a multiple of 2 depending on the size of the integer type. A single leading + * zero is applied to the first non-zero output byte if it is less than 0x10. + * + * Example: + * input = [123, -1, 0, 27, 342718233] + * s = input.toHex() + * s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719'] + * + * The example above shows an `INT32` type column where each integer is 4 bytes. + * Leading zeros are suppressed unless filling out a complete byte as in + * `123 -> '04D2'` instead of `000004D2` or `4D2`. + * + * @return new string ColumnVector + */ + public ColumnVector toHex() { + assert getType().isIntegral() : "Only integers are supported"; + return new ColumnVector(toHex(this.getNativeView())); + } } diff --git a/java/src/main/java/ai/rapids/cudf/DType.java b/java/src/main/java/ai/rapids/cudf/DType.java index d0bb7761da4..07bc4fe3bbf 100644 --- a/java/src/main/java/ai/rapids/cudf/DType.java +++ b/java/src/main/java/ai/rapids/cudf/DType.java @@ -413,6 +413,14 @@ public boolean isDurationType() { } /** + * Returns true for strictly Integer types not a type backed by + * ints + */ + public boolean isIntegral() { + return INTEGRALS.contains(this.typeId); + } + + /** * Returns true for nested types */ public boolean isNestedType() { @@ -506,4 +514,15 @@ public boolean hasOffsets() { DTypeEnum.STRING, DTypeEnum.LIST ); + + private static final EnumSet INTEGRALS = EnumSet.of( + DTypeEnum.INT8, + DTypeEnum.INT16, + DTypeEnum.INT32, + DTypeEnum.INT64, + DTypeEnum.UINT8, + DTypeEnum.UINT16, + DTypeEnum.UINT32, + DTypeEnum.UINT64 + ); } diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index d5aad03645f..0ddaa2c15b5 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2563,4 +2563,13 @@ Java_ai_rapids_cudf_ColumnView_purgeNonEmptyNulls(JNIEnv *env, jclass, jlong col CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_toHex(JNIEnv *env, jclass, jlong input_ptr) { + JNI_NULL_CHECK(env, input_ptr, "input is null", 0); + try { + cudf::jni::auto_set_device(env); + const cudf::column_view *input = reinterpret_cast(input_ptr); + return release_as_jlong(cudf::strings::integers_to_hex(*input)); + } + CATCH_STD(env, 0); +} } // extern "C" diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index f6dffc88b92..9a0f8bda994 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6876,4 +6876,14 @@ public void testUseAfterFree() { vector.close(); assertThrows(NullPointerException.class, vector::getDeviceMemorySize); } + + @Test + public void testConvertIntegerToHex() { + try ( + ColumnVector input = ColumnVector.fromInts(14, 2621, 50); + ColumnVector expected = ColumnVector.fromStrings("0E", "0A3D", "32"); + ColumnVector actual = input.toHex()) { + assertColumnsAreEqual(expected, actual); + } + } } From 66ac962dbeb69eade22b3bcaf186e3df2bae71b5 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Wed, 27 Sep 2023 12:20:20 -0700 Subject: [PATCH 29/76] JNI for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations (#14154) This implements JNI for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations in both groupby and reduction. Depends on: * https://github.com/rapidsai/cudf/pull/14045 Contributes to: * https://github.com/rapidsai/cudf/issues/13885. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/14154 --- .../main/java/ai/rapids/cudf/Aggregation.java | 26 ++++- .../ai/rapids/cudf/GroupByAggregation.java | 24 +++- .../ai/rapids/cudf/ReductionAggregation.java | 20 +++- java/src/main/native/src/AggregationJni.cpp | 7 +- .../test/java/ai/rapids/cudf/TableTest.java | 109 ++++++++++++++++++ 5 files changed, 181 insertions(+), 5 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index d10329ca0f2..379750bb0b7 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,7 +68,9 @@ enum Kind { DENSE_RANK(29), PERCENT_RANK(30), TDIGEST(31), // This can take a delta argument for accuracy level - MERGE_TDIGEST(32); // This can take a delta argument for accuracy level + MERGE_TDIGEST(32), // This can take a delta argument for accuracy level + HISTOGRAM(33), + MERGE_HISTOGRAM(34); final int nativeId; @@ -918,6 +920,26 @@ static TDigestAggregation mergeTDigest(int delta) { return new TDigestAggregation(Kind.MERGE_TDIGEST, delta); } + static final class HistogramAggregation extends NoParamAggregation { + private HistogramAggregation() { + super(Kind.HISTOGRAM); + } + } + + static final class MergeHistogramAggregation extends NoParamAggregation { + private MergeHistogramAggregation() { + super(Kind.MERGE_HISTOGRAM); + } + } + + static HistogramAggregation histogram() { + return new HistogramAggregation(); + } + + static MergeHistogramAggregation mergeHistogram() { + return new MergeHistogramAggregation(); + } + /** * Create one of the aggregations that only needs a kind, no other parameters. This does not * work for all types and for code safety reasons each kind is added separately. diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 500d18f7eae..0fae33927b6 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -315,4 +315,26 @@ public static GroupByAggregation createTDigest(int delta) { public static GroupByAggregation mergeTDigest(int delta) { return new GroupByAggregation(Aggregation.mergeTDigest(delta)); } + + /** + * Histogram aggregation, computing the frequencies for each unique row. + * + * A histogram is given as a lists column, in which the first child stores unique rows from + * the input values and the second child stores their corresponding frequencies. + * + * @return A lists of structs column in which each list contains a histogram corresponding to + * an input key. + */ + public static GroupByAggregation histogram() { + return new GroupByAggregation(Aggregation.histogram()); + } + + /** + * MergeHistogram aggregation, to merge multiple histograms. + * + * @return A new histogram in which the frequencies of the unique rows are sum up. + */ + public static GroupByAggregation mergeHistogram() { + return new GroupByAggregation(Aggregation.mergeHistogram()); + } } diff --git a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java index eab1c94fd2c..ba8ae379bae 100644 --- a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -286,4 +286,22 @@ public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEqual return new ReductionAggregation(Aggregation.mergeSets(nullEquality, nanEquality)); } + /** + * Create HistogramAggregation, computing the frequencies for each unique row. + * + * @return A structs column in which the first child stores unique rows from the input and the + * second child stores their corresponding frequencies. + */ + public static ReductionAggregation histogram() { + return new ReductionAggregation(Aggregation.histogram()); + } + + /** + * Create MergeHistogramAggregation, to merge multiple histograms. + * + * @return A new histogram in which the frequencies of the unique rows are sum up. + */ + public static ReductionAggregation mergeHistogram() { + return new ReductionAggregation(Aggregation.mergeHistogram()); + } } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index 6ac73282615..bc62e95c36a 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,6 +90,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv case 30: // ANSI SQL PERCENT_RANK return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE, {}, cudf::rank_percentage::ONE_NORMALIZED); + case 33: // HISTOGRAM + return cudf::make_histogram_aggregation(); + case 34: // MERGE_HISTOGRAM + return cudf::make_merge_histogram_aggregation(); + default: throw std::logic_error("Unsupported No Parameter Aggregation Operation"); } }(); diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 59f0d180c6e..faa73ac4322 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -4129,6 +4129,115 @@ void testMergeTDigestReduction() { } } + @Test + void testGroupbyHistogram() { + StructType histogramStruct = new StructType(false, + new BasicType(false, DType.INT32), // values + new BasicType(false, DType.INT64)); // frequencies + ListType histogramList = new ListType(false, histogramStruct); + + // key = 0: values = [2, 2, -3, -2, 2] + // key = 1: values = [2, 0, 5, 2, 1] + // key = 2: values = [-3, 1, 1, 2, 2] + try (Table input = new Table.TestBuilder() + .column(2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2) + .column(-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2) + .build(); + Table result = input.groupBy(0) + .aggregate(GroupByAggregation.histogram().onColumn(1)); + Table sortedResult = result.orderBy(OrderByArg.asc(0)); + ColumnVector sortedOutHistograms = sortedResult.getColumn(1).listSortRows(false, false); + + ColumnVector expectedKeys = ColumnVector.fromInts(0, 1, 2); + ColumnVector expectedHistograms = ColumnVector.fromLists(histogramList, + Arrays.asList(new StructData(-3, 1L), new StructData(-2, 1L), new StructData(2, 3L)), + Arrays.asList(new StructData(0, 1L), new StructData(1, 1L), new StructData(2, 2L), + new StructData(5, 1L)), + Arrays.asList(new StructData(-3, 1L), new StructData(1, 2L), new StructData(2, 2L))) + ) { + assertColumnsAreEqual(expectedKeys, sortedResult.getColumn(0)); + assertColumnsAreEqual(expectedHistograms, sortedOutHistograms); + } + } + + @Test + void testGroupbyMergeHistogram() { + StructType histogramStruct = new StructType(false, + new BasicType(false, DType.INT32), // values + new BasicType(false, DType.INT64)); // frequencies + ListType histogramList = new ListType(false, histogramStruct); + + // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]] + // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + try (Table input = new Table.TestBuilder() + .column(0, 1, 0, 1, 0) + .column(histogramStruct, + new StructData[]{new StructData(-3, 1L), new StructData(-2, 1L), new StructData(2, 3L)}, + new StructData[]{new StructData(-2, 1L), new StructData(1, 3L), new StructData(2, 2L)}, + new StructData[]{new StructData(0, 1L), new StructData(1, 1L)}, + new StructData[]{new StructData(0, 2L), new StructData(1, 1L), new StructData(2, 2L)}, + new StructData[]{new StructData(-3, 3L), new StructData(0, 1L), new StructData(1, 2L)}) + .build(); + Table result = input.groupBy(0) + .aggregate(GroupByAggregation.mergeHistogram().onColumn(1)); + Table sortedResult = result.orderBy(OrderByArg.asc(0)); + ColumnVector sortedOutHistograms = sortedResult.getColumn(1).listSortRows(false, false); + + ColumnVector expectedKeys = ColumnVector.fromInts(0, 1); + ColumnVector expectedHistograms = ColumnVector.fromLists(histogramList, + Arrays.asList(new StructData(-3, 4L), new StructData(-2, 1L), new StructData(0, 2L), + new StructData(1, 3L), new StructData(2, 3L)), + Arrays.asList(new StructData(-2, 1L), new StructData(0, 2L), new StructData(1, 4L), + new StructData(2, 4L))) + ) { + assertColumnsAreEqual(expectedKeys, sortedResult.getColumn(0)); + assertColumnsAreEqual(expectedHistograms, sortedOutHistograms); + } + } + + @Test + void testReductionHistogram() { + StructType histogramStruct = new StructType(false, + new BasicType(false, DType.INT32), // values + new BasicType(false, DType.INT64)); // frequencies + + try (ColumnVector input = ColumnVector.fromInts(-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1); + Scalar result = input.reduce(ReductionAggregation.histogram(), DType.LIST); + ColumnVector resultCV = result.getListAsColumnView().copyToColumnVector(); + Table resultTable = new Table(resultCV); + Table sortedResult = resultTable.orderBy(OrderByArg.asc(0)); + + ColumnVector expectedHistograms = ColumnVector.fromStructs(histogramStruct, + new StructData(-3, 2L), new StructData(-2, 1L), new StructData(0, 1L), + new StructData(1, 2L), new StructData(2, 4L), new StructData(5, 1L)) + ) { + assertColumnsAreEqual(expectedHistograms, sortedResult.getColumn(0)); + } + } + + @Test + void testReductionMergeHistogram() { + StructType histogramStruct = new StructType(false, + new BasicType(false, DType.INT32), // values + new BasicType(false, DType.INT64)); // frequencies + + try (ColumnVector input = ColumnVector.fromStructs(histogramStruct, + new StructData(-3, 2L), new StructData(2, 1L), new StructData(1, 1L), + new StructData(2, 2L), new StructData(0, 4L), new StructData(5, 1L), + new StructData(2, 2L), new StructData(-3, 3L), new StructData(-2, 5L), + new StructData(2, 3L), new StructData(1, 4L)); + Scalar result = input.reduce(ReductionAggregation.mergeHistogram(), DType.LIST); + ColumnVector resultCV = result.getListAsColumnView().copyToColumnVector(); + Table resultTable = new Table(resultCV); + Table sortedResult = resultTable.orderBy(OrderByArg.asc(0)); + + ColumnVector expectedHistograms = ColumnVector.fromStructs(histogramStruct, + new StructData(-3, 5L), new StructData(-2, 5L), new StructData(0, 4L), + new StructData(1, 5L), new StructData(2, 8L), new StructData(5, 1L)) + ) { + assertColumnsAreEqual(expectedHistograms, sortedResult.getColumn(0)); + } + } @Test void testGroupByMinMaxDecimal() { try (Table t1 = new Table.TestBuilder() From b789d4ce3c090a3f25a8657d9a8582a1edb54f12 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 27 Sep 2023 12:20:46 -0700 Subject: [PATCH 30/76] Preserve name of the column while initializing a `DataFrame` (#14110) Fixes: #14088 This PR preserves `names` of `column` object while constructing a `DataFrame` through various constructor flows. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/14110 --- python/cudf/cudf/core/column_accessor.py | 2 -- python/cudf/cudf/core/dataframe.py | 26 ++++++++++++++++++--- python/cudf/cudf/core/indexed_frame.py | 4 +++- python/cudf/cudf/tests/test_dataframe.py | 29 ++++++++++++++++++++---- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index bec9c367ba9..cb79a30422e 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -197,8 +197,6 @@ def nlevels(self) -> int: @property def name(self) -> Any: - if len(self._data) == 0: - return None return self.level_names[-1] @property diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8a3dbe77787..ead2f182e2d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -665,7 +665,10 @@ def __init__( len(self), dtype="object", masked=True ) for k in columns - } + }, + level_names=tuple(columns.names) + if isinstance(columns, pd.Index) + else None, ) elif isinstance(data, ColumnAccessor): raise TypeError( @@ -712,6 +715,11 @@ def __init__( self._data = new_df._data self._index = new_df._index + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) elif len(data) > 0 and isinstance(data[0], Series): self._init_from_series_list( data=data, columns=columns, index=index @@ -834,6 +842,11 @@ def _init_from_series_list(self, data, columns, index): self._data[col_name] = column.column_empty( row_count=len(self), dtype=None, masked=True ) + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) self._data = self._data.select_by_label(columns) @_cudf_nvtx_annotate @@ -957,6 +970,11 @@ def _init_from_dict_like( data[col_name], nan_as_null=nan_as_null, ) + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) @classmethod def _from_data( @@ -5131,7 +5149,7 @@ def from_pandas(cls, dataframe, nan_as_null=None): index = cudf.from_pandas(dataframe.index, nan_as_null=nan_as_null) df = cls._from_data(data, index) - df._data._level_names = list(dataframe.columns.names) + df._data._level_names = tuple(dataframe.columns.names) # Set columns only if it is a MultiIndex if isinstance(dataframe.columns, pd.MultiIndex): @@ -5377,6 +5395,8 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): df = df.set_index(index) else: df._index = as_index(index) + if isinstance(columns, pd.Index): + df._data._level_names = tuple(columns.names) return df @classmethod @@ -5434,7 +5454,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): data, nan_as_null=nan_as_null ) if isinstance(columns, pd.Index): - df._data._level_names = list(columns.names) + df._data._level_names = tuple(columns.names) if index is None: df._index = RangeIndex(start=0, stop=len(data)) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index aacf1fa8dae..1008cbdb67f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2661,7 +2661,9 @@ def _reindex( data=cudf.core.column_accessor.ColumnAccessor( cols, multiindex=self._data.multiindex, - level_names=self._data.level_names, + level_names=tuple(column_names.names) + if isinstance(column_names, pd.Index) + else None, ), index=index, ) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 67b63028fab..c297748f7e5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -6394,6 +6394,7 @@ def test_df_series_dataframe_astype_dtype_dict(copy): ([range(100), range(100)], ["range" + str(i) for i in range(100)]), (((1, 2, 3), (1, 2, 3)), ["tuple0", "tuple1", "tuple2"]), ([[1, 2, 3]], ["list col1", "list col2", "list col3"]), + ([[1, 2, 3]], pd.Index(["col1", "col2", "col3"], name="rapids")), ([range(100)], ["range" + str(i) for i in range(100)]), (((1, 2, 3),), ["k1", "k2", "k3"]), ], @@ -7969,6 +7970,7 @@ def test_series_empty(ps): @pytest.mark.parametrize( "data", [ + None, [], [1], {"a": [10, 11, 12]}, @@ -7979,7 +7981,10 @@ def test_series_empty(ps): }, ], ) -@pytest.mark.parametrize("columns", [["a"], ["another column name"], None]) +@pytest.mark.parametrize( + "columns", + [["a"], ["another column name"], None, pd.Index(["a"], name="index name")], +) def test_dataframe_init_with_columns(data, columns): pdf = pd.DataFrame(data, columns=columns) gdf = cudf.DataFrame(data, columns=columns) @@ -8047,7 +8052,16 @@ def test_dataframe_init_with_columns(data, columns): ], ) @pytest.mark.parametrize( - "columns", [None, ["0"], [0], ["abc"], [144, 13], [2, 1, 0]] + "columns", + [ + None, + ["0"], + [0], + ["abc"], + [144, 13], + [2, 1, 0], + pd.Index(["abc"], name="custom_name"), + ], ) def test_dataframe_init_from_series_list(data, ignore_dtype, columns): gd_data = [cudf.from_pandas(obj) for obj in data] @@ -10239,14 +10253,21 @@ def test_dataframe_binop_with_datetime_index(): @pytest.mark.parametrize( - "columns", ([], ["c", "a"], ["a", "d", "b", "e", "c"], ["a", "b", "c"]) + "columns", + ( + [], + ["c", "a"], + ["a", "d", "b", "e", "c"], + ["a", "b", "c"], + pd.Index(["b", "a", "c"], name="custom_name"), + ), ) @pytest.mark.parametrize("index", (None, [4, 5, 6])) def test_dataframe_dict_like_with_columns(columns, index): data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} expect = pd.DataFrame(data, columns=columns, index=index) actual = cudf.DataFrame(data, columns=columns, index=index) - if index is None and columns == []: + if index is None and len(columns) == 0: # We make an empty range index, pandas makes an empty index expect = expect.reset_index(drop=True) assert_eq(expect, actual) From 2c19bf328ffefb97d17e5ae600197a4ea9ca4445 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 27 Sep 2023 20:37:04 -0700 Subject: [PATCH 31/76] Propagate errors from Parquet reader kernels back to host (#14167) Pass the error code to the host when a kernel detects invalid input. If multiple errors types are detected, they are combined using a bitwise OR so that caller gets the aggregate error code that includes all types of errors that occurred. Does not change the kernel side checks. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - https://github.com/nvdbaranec - Divye Gala (https://github.com/divyegala) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14167 --- cpp/src/io/parquet/page_data.cu | 25 ++++++++--- cpp/src/io/parquet/page_decode.cuh | 57 +++++++++++++++++------- cpp/src/io/parquet/page_delta_decode.cu | 25 ++++++++--- cpp/src/io/parquet/page_string_decode.cu | 25 ++++++++--- cpp/src/io/parquet/parquet_gpu.hpp | 21 +++++++++ cpp/src/io/parquet/reader_impl.cpp | 19 ++++++-- 6 files changed, 130 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index c26802aa3c2..230834632dd 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -430,10 +430,15 @@ static __device__ void gpuOutputGeneric( * @param chunks List of column chunks * @param min_row Row index to start reading at * @param num_rows Maximum number of rows to read + * @param error_code Error code to set if an error is encountered */ template -__global__ void __launch_bounds__(decode_block_size) gpuDecodePageData( - PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) +__global__ void __launch_bounds__(decode_block_size) + gpuDecodePageData(PageInfo* pages, + device_span chunks, + size_t min_row, + size_t num_rows, + int32_t* error_code) { __shared__ __align__(16) page_state_s state_g; __shared__ __align__(16) @@ -472,7 +477,8 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodePageData( // skipped_leaf_values will always be 0 for flat hierarchies. uint32_t skipped_leaf_values = s->page.skipped_leaf_values; - while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { + while (s->error == 0 && + (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { int target_pos; int src_pos = s->src_pos; @@ -596,6 +602,10 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodePageData( } __syncthreads(); } + if (t == 0 and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.fetch_or(s->error, cuda::std::memory_order_relaxed); + } } struct mask_tform { @@ -621,6 +631,7 @@ void __host__ DecodePageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); @@ -629,11 +640,11 @@ void __host__ DecodePageData(cudf::detail::hostdevice_vector& pages, dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - gpuDecodePageData - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodePageData<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } else { - gpuDecodePageData - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodePageData<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } } diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 5e66885d746..cdc29197eb3 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -21,6 +21,7 @@ #include +#include #include namespace cudf::io::parquet::gpu { @@ -69,6 +70,18 @@ struct page_state_s { PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{}; // points to either nesting_decode_cache above when possible, or to the global source otherwise PageNestingDecodeInfo* nesting_info{}; + + inline __device__ void set_error_code(decode_error err) volatile + { + cuda::atomic_ref ref{const_cast(error)}; + ref.fetch_or(static_cast(err), cuda::std::memory_order_relaxed); + } + + inline __device__ void reset_error_code() volatile + { + cuda::atomic_ref ref{const_cast(error)}; + ref.store(0, cuda::std::memory_order_release); + } }; // buffers only used in the decode kernel. separated from page_state_s to keep @@ -471,7 +484,7 @@ __device__ void gpuDecodeStream( int32_t value_count = s->lvl_count[lvl]; int32_t batch_coded_count = 0; - while (value_count < target_count && value_count < num_input_values) { + while (s->error == 0 && value_count < target_count && value_count < num_input_values) { int batch_len; if (level_run <= 1) { // Get a new run symbol from the byte stream @@ -487,7 +500,14 @@ __device__ void gpuDecodeStream( cur++; } } - if (cur > end || level_run <= 1) { s->error = 0x10; } + if (cur > end) { + s->set_error_code(decode_error::LEVEL_STREAM_OVERRUN); + break; + } + if (level_run <= 1) { + s->set_error_code(decode_error::INVALID_LEVEL_RUN); + break; + } sym_len = (int32_t)(cur - cur_def); __threadfence_block(); } @@ -496,7 +516,7 @@ __device__ void gpuDecodeStream( level_run = shuffle(level_run); cur_def += sym_len; } - if (s->error) { break; } + if (s->error != 0) { break; } batch_len = min(num_input_values - value_count, 32); if (level_run & 1) { @@ -852,7 +872,7 @@ __device__ void gpuDecodeLevels(page_state_s* s, constexpr int batch_size = 32; int cur_leaf_count = target_leaf_count; - while (!s->error && s->nz_count < target_leaf_count && + while (s->error == 0 && s->nz_count < target_leaf_count && s->input_value_count < s->num_input_values) { if (has_repetition) { gpuDecodeStream(rep, s, cur_leaf_count, t, level_type::REPETITION); @@ -916,7 +936,7 @@ inline __device__ uint32_t InitLevelSection(page_state_s* s, } s->lvl_start[lvl] = cur; - if (cur > end) { s->error = 2; } + if (cur > end) { s->set_error_code(decode_error::LEVEL_STREAM_OVERRUN); } }; // this is a little redundant. if level_bits == 0, then nothing should be encoded @@ -941,8 +961,8 @@ inline __device__ uint32_t InitLevelSection(page_state_s* s, // add back the 4 bytes for the length len += 4; } else { - len = 0; - s->error = 2; + len = 0; + s->set_error_code(decode_error::LEVEL_STREAM_OVERRUN); } } else if (encoding == Encoding::BIT_PACKED) { len = (s->page.num_input_values * level_bits + 7) >> 3; @@ -951,8 +971,8 @@ inline __device__ uint32_t InitLevelSection(page_state_s* s, s->lvl_start[lvl] = cur; s->abs_lvl_start[lvl] = cur; } else { - s->error = 3; - len = 0; + len = 0; + s->set_error_code(decode_error::UNSUPPORTED_ENCODING); } s->abs_lvl_end[lvl] = start + len; @@ -1094,7 +1114,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, } if (!t) { - s->error = 0; + s->reset_error_code(); // IMPORTANT : nested schemas can have 0 rows in a page but still have // values. The case is: @@ -1152,7 +1172,7 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, break; default: // FIXED_LEN_BYTE_ARRAY: s->dtype_len = dtype_len_out; - s->error |= (s->dtype_len <= 0); + if (s->dtype_len <= 0) { s->set_error_code(decode_error::INVALID_DATA_TYPE); } break; } // Special check for downconversions @@ -1268,7 +1288,9 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, s->dict_run = 0; s->dict_val = 0; s->dict_bits = (cur < end) ? *cur++ : 0; - if (s->dict_bits > 32 || !s->dict_base) { s->error = (10 << 8) | s->dict_bits; } + if (s->dict_bits > 32 || !s->dict_base) { + s->set_error_code(decode_error::INVALID_DICT_WIDTH); + } break; case Encoding::PLAIN: s->dict_size = static_cast(end - cur); @@ -1279,22 +1301,23 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, // first 4 bytes are length of RLE data int const len = (cur[0]) + (cur[1] << 8) + (cur[2] << 16) + (cur[3] << 24); cur += 4; - if (cur + len > end) { s->error = 2; } + if (cur + len > end) { s->set_error_code(decode_error::DATA_STREAM_OVERRUN); } s->dict_run = 0; } break; case Encoding::DELTA_BINARY_PACKED: // nothing to do, just don't error break; - default: - s->error = 1; // Unsupported encoding + default: { + s->set_error_code(decode_error::UNSUPPORTED_ENCODING); break; + } } - if (cur > end) { s->error = 1; } + if (cur > end) { s->set_error_code(decode_error::DATA_STREAM_OVERRUN); } s->lvl_end = cur; s->data_start = cur; s->data_end = end; } else { - s->error = 1; + s->set_error_code(decode_error::EMPTY_PAGE); } s->lvl_count[level_type::REPETITION] = 0; diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index 35f33a761be..2b78dead205 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -32,8 +32,12 @@ namespace { // with V2 page headers; see https://www.mail-archive.com/dev@parquet.apache.org/msg11826.html). // this kernel only needs 96 threads (3 warps)(for now). template -__global__ void __launch_bounds__(96) gpuDecodeDeltaBinary( - PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) +__global__ void __launch_bounds__(96) + gpuDecodeDeltaBinary(PageInfo* pages, + device_span chunks, + size_t min_row, + size_t num_rows, + int32_t* error_code) { using cudf::detail::warp_size; __shared__ __align__(16) delta_binary_decoder db_state; @@ -79,7 +83,8 @@ __global__ void __launch_bounds__(96) gpuDecodeDeltaBinary( // that has a value we need. if (skipped_leaf_values > 0) { db->skip_values(skipped_leaf_values); } - while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { + while (s->error == 0 && + (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { uint32_t target_pos; uint32_t const src_pos = s->src_pos; @@ -145,6 +150,11 @@ __global__ void __launch_bounds__(96) gpuDecodeDeltaBinary( } __syncthreads(); } + + if (t == 0 and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.fetch_or(s->error, cuda::std::memory_order_relaxed); + } } } // anonymous namespace @@ -157,6 +167,7 @@ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); @@ -165,11 +176,11 @@ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - gpuDecodeDeltaBinary - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodeDeltaBinary<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } else { - gpuDecodeDeltaBinary - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodeDeltaBinary<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } } diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index 1ac4c95f713..d79abe4a6d2 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -582,8 +582,12 @@ __global__ void __launch_bounds__(preprocess_block_size) gpuComputePageStringSiz * @tparam level_t Type used to store decoded repetition and definition levels */ template -__global__ void __launch_bounds__(decode_block_size) gpuDecodeStringPageData( - PageInfo* pages, device_span chunks, size_t min_row, size_t num_rows) +__global__ void __launch_bounds__(decode_block_size) + gpuDecodeStringPageData(PageInfo* pages, + device_span chunks, + size_t min_row, + size_t num_rows, + int32_t* error_code) { __shared__ __align__(16) page_state_s state_g; __shared__ __align__(4) size_type last_offset; @@ -617,7 +621,8 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodeStringPageData( // skipped_leaf_values will always be 0 for flat hierarchies. uint32_t skipped_leaf_values = s->page.skipped_leaf_values; - while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { + while (s->error == 0 && + (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) { int target_pos; int src_pos = s->src_pos; @@ -742,6 +747,11 @@ __global__ void __launch_bounds__(decode_block_size) gpuDecodeStringPageData( auto const offptr = reinterpret_cast(nesting_info_base[leaf_level_index].data_out); block_excl_sum(offptr, value_count, s->page.str_offset); + + if (t == 0 and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.fetch_or(s->error, cuda::std::memory_order_relaxed); + } } } // anonymous namespace @@ -775,6 +785,7 @@ void __host__ DecodeStringPageData(cudf::detail::hostdevice_vector& pa size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); @@ -783,11 +794,11 @@ void __host__ DecodeStringPageData(cudf::detail::hostdevice_vector& pa dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - gpuDecodeStringPageData - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodeStringPageData<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } else { - gpuDecodeStringPageData - <<>>(pages.device_ptr(), chunks, min_row, num_rows); + gpuDecodeStringPageData<<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index a760c2448dc..3c37c0df021 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -54,6 +54,21 @@ constexpr int rolling_index(int index) return index % rolling_size; } +/** + * @brief Enum for the different types of errors that can occur during decoding. + * + * These values are used as bitmasks, so they must be powers of 2. + */ +enum class decode_error : int32_t { + DATA_STREAM_OVERRUN = 0x1, + LEVEL_STREAM_OVERRUN = 0x2, + UNSUPPORTED_ENCODING = 0x4, + INVALID_LEVEL_RUN = 0x8, + INVALID_DATA_TYPE = 0x10, + EMPTY_PAGE = 0x20, + INVALID_DICT_WIDTH = 0x40, +}; + /** * @brief Struct representing an input column in the file. */ @@ -566,6 +581,7 @@ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ void DecodePageData(cudf::detail::hostdevice_vector& pages, @@ -573,6 +589,7 @@ void DecodePageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream); /** @@ -586,6 +603,7 @@ void DecodePageData(cudf::detail::hostdevice_vector& pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ void DecodeStringPageData(cudf::detail::hostdevice_vector& pages, @@ -593,6 +611,7 @@ void DecodeStringPageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream); /** @@ -606,6 +625,7 @@ void DecodeStringPageData(cudf::detail::hostdevice_vector& pages, * @param[in] num_rows Total number of rows to read * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding + * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use, default 0 */ void DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages, @@ -613,6 +633,7 @@ void DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, + int32_t* error_code, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 8b0a0bd4eb0..6cbe64e227b 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -163,6 +163,8 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) chunk_nested_valids.host_to_device_async(_stream); chunk_nested_data.host_to_device_async(_stream); + rmm::device_scalar error_code(0, _stream); + // get the number of streams we need from the pool and tell them to wait on the H2D copies int const nkernels = std::bitset<32>(kernel_mask).count(); auto streams = cudf::detail::fork_streams(_stream, nkernels); @@ -174,17 +176,20 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) if (has_strings) { auto& stream = streams[s_idx++]; chunk_nested_str_data.host_to_device_async(stream); - gpu::DecodeStringPageData(pages, chunks, num_rows, skip_rows, level_type_size, stream); + gpu::DecodeStringPageData( + pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), stream); } // launch delta binary decoder if ((kernel_mask & gpu::KERNEL_MASK_DELTA_BINARY) != 0) { - gpu::DecodeDeltaBinary(pages, chunks, num_rows, skip_rows, level_type_size, streams[s_idx++]); + gpu::DecodeDeltaBinary( + pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), streams[s_idx++]); } // launch the catch-all page decoder if ((kernel_mask & gpu::KERNEL_MASK_GENERAL) != 0) { - gpu::DecodePageData(pages, chunks, num_rows, skip_rows, level_type_size, streams[s_idx++]); + gpu::DecodePageData( + pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), streams[s_idx++]); } // synchronize the streams @@ -193,7 +198,13 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) pages.device_to_host_async(_stream); page_nesting.device_to_host_async(_stream); page_nesting_decode.device_to_host_async(_stream); - _stream.synchronize(); + + auto const decode_error = error_code.value(_stream); + if (decode_error != 0) { + std::stringstream stream; + stream << std::hex << decode_error; + CUDF_FAIL("Parquet data decode failed with code(s) 0x" + stream.str()); + } // for list columns, add the final offset to every offset buffer. // TODO : make this happen in more efficiently. Maybe use thrust::for_each From 53f0f74f6c6d66441225278f19a69885fb8b43c6 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Wed, 27 Sep 2023 23:32:46 -0500 Subject: [PATCH 32/76] Support for progressive parquet chunked reading. (#14079) Previously, the parquet chunked reader operated by controlling the size of output chunks only. It would still ingest the entire input file and decompress it, which can take up a considerable amount of memory. With this new 'progressive' support, we also 'chunk' at the input level. Specifically, the user can pass a `pass_read_limit` value which controls how much memory is used for storing compressed/decompressed data. The reader will make multiple passes over the file, reading as many row groups as it can to attempt to fit within this limit. Within each pass, chunks are emitted as before. From the external user's perspective, the chunked read mechanism is the same. You call `has_next()` and `read_chunk()`. If the user has specified a value for `pass_read_limit` the set of chunks produced might end up being different (although the concatenation of all of them will still be the same). The core idea of the code change is to add the idea of the internal `pass`. Previously we had a `file_intermediate_data` which held data across `read_chunk()` calls. There is now a `pass_intermediate_data` struct which holds information specific to a given pass. Many of the invariant things from the file level before (row groups and chunks to process) are now stored in the pass intermediate data. As we begin each pass, we take the subset of global row groups and chunks that we are going to process for this pass, copy them to out intermediate data, and the remainder of the reader reference this instead of the file-level data. In order to avoid breaking pre-existing interfaces, there's a new contructor for the `chunked_parquet_reader` class: ``` chunked_parquet_reader( std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); ``` Authors: - https://github.com/nvdbaranec Approvers: - Yunsong Wang (https://github.com/PointKernel) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/14079 --- cpp/include/cudf/io/detail/parquet.hpp | 39 ++- cpp/include/cudf/io/parquet.hpp | 24 ++ cpp/src/io/functions.cpp | 17 + cpp/src/io/parquet/parquet_gpu.hpp | 69 +++- cpp/src/io/parquet/reader.cpp | 4 +- cpp/src/io/parquet/reader_impl.cpp | 128 ++++--- cpp/src/io/parquet/reader_impl.hpp | 52 ++- cpp/src/io/parquet/reader_impl_helpers.cpp | 4 +- cpp/src/io/parquet/reader_impl_helpers.hpp | 15 +- cpp/src/io/parquet/reader_impl_preprocess.cu | 344 +++++++++++++------ cpp/tests/io/parquet_chunked_reader_test.cpp | 68 +++- 11 files changed, 561 insertions(+), 203 deletions(-) diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 3f2e1fa5e6c..074f690d2c7 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -91,7 +91,8 @@ class reader { class chunked_reader : private reader { public: /** - * @brief Constructor from a read size limit and an array of data sources with reader options. + * @brief Constructor from an output size memory limit and an input size memory limit and an array + * of data sources with reader options. * * The typical usage should be similar to this: * ``` @@ -102,17 +103,45 @@ class chunked_reader : private reader { * * ``` * - * If `chunk_read_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the - * whole file and return a table containing all rows. + * If `chunk_read_limit == 0` (i.e., no output limit), and `pass_read_limit == 0` (no input + * temporary memory size limit) a call to `read_chunk()` will read the whole file and return a + * table containing all rows. + * + * The chunk_read_limit parameter controls the size of the output chunks produces. If the user + * specifies 100 MB of data, the reader will attempt to return chunks containing tables that have + * a total bytes size (over all columns) of 100 MB or less. This is a soft limit and the code + * will not fail if it cannot satisfy the limit. It will make a best-effort atttempt only. + * + * The pass_read_limit parameter controls how much temporary memory is used in the process of + * decoding the file. The primary contributor to this memory usage is the uncompressed size of + * the data read out of the file and the decompressed (but not yet decoded) size of the data. The + * granularity of a given pass is at the row group level. It will not attempt to read at the sub + * row-group level. + * + * Combined, the way to visualize passes and chunks is as follows: + * + * @code{.pseudo} + * for(each pass){ + * for(each output chunk within a pass){ + * return a table that fits within the output chunk limit + * } + * } + * @endcode + * + * With a pass_read_limit of `0` you are simply saying you have one pass that reads the entire + * file as normal. * * @param chunk_read_limit Limit on total number of bytes to be returned per read, - * or `0` if there is no limit + * or `0` if there is no limit + * @param pass_read_limit Limit on total amount of memory used for temporary computations during + * loading, or `0` if there is no limit * @param sources Input `datasource` objects to read the dataset from * @param options Settings for controlling reading behavior - * @param stream CUDA stream used for device memory operations and kernel launches. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 788ff15f3c1..deaf23d405a 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -445,6 +445,30 @@ class chunked_parquet_reader { parquet_reader_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** + * @brief Constructor for chunked reader. + * + * This constructor requires the same `parquet_reader_option` parameter as in + * `cudf::read_parquet()`, with additional parameters to specify the size byte limit of the + * output table for each reading, and a byte limit on the amount of temporary memory to use + * when reading. pass_read_limit affects how many row groups we can read at a time by limiting + * the amount of memory dedicated to decompression space. pass_read_limit is a hint, not an + * absolute limit - if a single row group cannot fit within the limit given, it will still be + * loaded. + * + * @param chunk_read_limit Limit on total number of bytes to be returned per read, + * or `0` if there is no limit + * @param pass_read_limit Limit on the amount of memory used for reading and decompressing data or + * `0` if there is no limit + * @param options The options used to read Parquet file + * @param mr Device memory resource to use for device memory allocation + */ + chunked_parquet_reader( + std::size_t chunk_read_limit, + std::size_t pass_read_limit, + parquet_reader_options const& options, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Destructor, destroying the internal reader instance. * diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 45f8b0f8822..392a7850886 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -562,6 +562,23 @@ chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit, parquet_reader_options const& options, rmm::mr::device_memory_resource* mr) : reader{std::make_unique(chunk_read_limit, + 0, + make_datasources(options.get_source()), + options, + cudf::get_default_stream(), + mr)} +{ +} + +/** + * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader + */ +chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, + parquet_reader_options const& options, + rmm::mr::device_memory_resource* mr) + : reader{std::make_unique(chunk_read_limit, + pass_read_limit, make_datasources(options.get_source()), options, cudf::get_default_stream(), diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 3c37c0df021..51c862b376b 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -321,33 +321,74 @@ struct ColumnChunkDesc { }; /** - * @brief Struct to store raw/intermediate file data before parsing. + * @brief The row_group_info class + */ +struct row_group_info { + size_type index; // row group index within a file. aggregate_reader_metadata::get_row_group() is + // called with index and source_index + size_t start_row; + size_type source_index; // file index. + + row_group_info() = default; + + row_group_info(size_type index, size_t start_row, size_type source_index) + : index{index}, start_row{start_row}, source_index{source_index} + { + } +}; + +/** + * @brief Struct to store file-level data that remains constant for + * all passes/chunks for the file. */ struct file_intermediate_data { + // all row groups to read + std::vector row_groups{}; + + // all chunks from the selected row groups. We may end up reading these chunks progressively + // instead of all at once + std::vector chunks{}; + + // skip_rows/num_rows values for the entire file. these need to be adjusted per-pass because we + // may not be visiting every row group that contains these bounds + size_t global_skip_rows; + size_t global_num_rows; +}; + +/** + * @brief Structs to identify the reading row range for each chunk of rows in chunked reading. + */ +struct chunk_read_info { + size_t skip_rows; + size_t num_rows; +}; + +/** + * @brief Struct to store pass-level data that remains constant for a single pass. + */ +struct pass_intermediate_data { std::vector> raw_page_data; rmm::device_buffer decomp_page_data; + + // rowgroup, chunk and page information for the current pass. + std::vector row_groups{}; cudf::detail::hostdevice_vector chunks{}; cudf::detail::hostdevice_vector pages_info{}; cudf::detail::hostdevice_vector page_nesting_info{}; cudf::detail::hostdevice_vector page_nesting_decode_info{}; - rmm::device_buffer level_decode_data; - int level_type_size; -}; - -/** - * @brief Struct to store intermediate page data for parsing each chunk of rows in chunked reading. - */ -struct chunk_intermediate_data { rmm::device_uvector page_keys{0, rmm::cuda_stream_default}; rmm::device_uvector page_index{0, rmm::cuda_stream_default}; rmm::device_uvector str_dict_index{0, rmm::cuda_stream_default}; -}; -/** - * @brief Structs to identify the reading row range for each chunk of rows in chunked reading. - */ -struct chunk_read_info { + std::vector output_chunk_read_info; + std::size_t current_output_chunk{0}; + + rmm::device_buffer level_decode_data{}; + int level_type_size{0}; + + // skip_rows and num_rows values for this particular pass. these may be adjusted values from the + // global values stored in file_intermediate_data. size_t skip_rows; size_t num_rows; }; diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index 7365c102d8f..1e87447006d 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -43,12 +43,14 @@ table_with_metadata reader::read(parquet_reader_options const& options) } chunked_reader::chunked_reader(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - _impl = std::make_unique(chunk_read_limit, std::move(sources), options, stream, mr); + _impl = std::make_unique( + chunk_read_limit, pass_read_limit, std::move(sources), options, stream, mr); } chunked_reader::~chunked_reader() = default; diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 6cbe64e227b..ea40f29a070 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -29,10 +29,10 @@ namespace cudf::io::detail::parquet { void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) { - auto& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; - auto& page_nesting = _file_itm_data.page_nesting_info; - auto& page_nesting_decode = _file_itm_data.page_nesting_decode_info; + auto& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; + auto& page_nesting = _pass_itm_data->page_nesting_info; + auto& page_nesting_decode = _pass_itm_data->page_nesting_decode_info; // Should not reach here if there is no page data. CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); @@ -55,7 +55,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) std::vector col_sizes(_input_columns.size(), 0L); if (has_strings) { gpu::ComputePageStringSizes( - pages, chunks, skip_rows, num_rows, _file_itm_data.level_type_size, _stream); + pages, chunks, skip_rows, num_rows, _pass_itm_data->level_type_size, _stream); col_sizes = calculate_page_string_offsets(); @@ -169,7 +169,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) int const nkernels = std::bitset<32>(kernel_mask).count(); auto streams = cudf::detail::fork_streams(_stream, nkernels); - auto const level_type_size = _file_itm_data.level_type_size; + auto const level_type_size = _pass_itm_data->level_type_size; // launch string decoder int s_idx = 0; @@ -277,6 +277,7 @@ reader::impl::impl(std::vector>&& sources, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : impl(0 /*chunk_read_limit*/, + 0 /*input_pass_read_limit*/, std::forward>>(sources), options, stream, @@ -285,11 +286,16 @@ reader::impl::impl(std::vector>&& sources, } reader::impl::impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : _stream{stream}, _mr{mr}, _sources{std::move(sources)}, _chunk_read_limit{chunk_read_limit} + : _stream{stream}, + _mr{mr}, + _sources{std::move(sources)}, + _output_chunk_read_limit{chunk_read_limit}, + _input_pass_read_limit{pass_read_limit} { // Open and parse the source dataset metadata _metadata = std::make_unique(_sources); @@ -313,11 +319,8 @@ reader::impl::impl(std::size_t chunk_read_limit, _timestamp_type.id()); // Save the states of the output buffers for reuse in `chunk_read()`. - // Don't need to do it if we read the file all at once. - if (_chunk_read_limit > 0) { - for (auto const& buff : _output_buffers) { - _output_buffers_template.emplace_back(inline_column_buffer::empty_like(buff)); - } + for (auto const& buff : _output_buffers) { + _output_buffers_template.emplace_back(inline_column_buffer::empty_like(buff)); } } @@ -327,32 +330,62 @@ void reader::impl::prepare_data(int64_t skip_rows, host_span const> row_group_indices, std::optional> filter) { - if (_file_preprocessed) { return; } + // if we have not preprocessed at the whole-file level, do that now + if (!_file_preprocessed) { + // if filter is not empty, then create output types as vector and pass for filtering. + std::vector output_types; + if (filter.has_value()) { + std::transform(_output_buffers.cbegin(), + _output_buffers.cend(), + std::back_inserter(output_types), + [](auto const& col) { return col.type; }); + } + std::tie( + _file_itm_data.global_skip_rows, _file_itm_data.global_num_rows, _file_itm_data.row_groups) = + _metadata->select_row_groups( + row_group_indices, skip_rows, num_rows, output_types, filter, _stream); + + if (_file_itm_data.global_num_rows > 0 && not _file_itm_data.row_groups.empty() && + not _input_columns.empty()) { + // fills in chunk information without physically loading or decompressing + // the associated data + load_global_chunk_info(); + + // compute schedule of input reads. Each rowgroup contains 1 chunk per column. For now + // we will read an entire row group at a time. However, it is possible to do + // sub-rowgroup reads if we made some estimates on individual chunk sizes (tricky) and + // changed the high level structure such that we weren't always reading an entire table's + // worth of columns at once. + compute_input_pass_row_group_info(); + } - // if filter is not empty, then create output types as vector and pass for filtering. - std::vector output_types; - if (filter.has_value()) { - std::transform(_output_buffers.cbegin(), - _output_buffers.cend(), - std::back_inserter(output_types), - [](auto const& col) { return col.type; }); + _file_preprocessed = true; } - auto const [skip_rows_corrected, num_rows_corrected, row_groups_info] = - _metadata->select_row_groups( - row_group_indices, skip_rows, num_rows, output_types, filter, _stream); - - if (num_rows_corrected > 0 && not row_groups_info.empty() && not _input_columns.empty()) { - load_and_decompress_data(row_groups_info, num_rows_corrected); - preprocess_pages( - skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit); - - if (_chunk_read_limit == 0) { // read the whole file at once - CUDF_EXPECTS(_chunk_read_info.size() == 1, - "Reading the whole file should yield only one chunk."); + + // if we have to start a new pass, do that now + if (!_pass_preprocessed) { + auto const num_passes = _input_pass_row_group_offsets.size() - 1; + + // always create the pass struct, even if we end up with no passes. + // this will also cause the previous pass information to be deleted + _pass_itm_data = std::make_unique(); + + if (_file_itm_data.global_num_rows > 0 && not _file_itm_data.row_groups.empty() && + not _input_columns.empty() && _current_input_pass < num_passes) { + // setup the pass_intermediate_info for this pass. + setup_pass(); + + load_and_decompress_data(); + preprocess_pages(uses_custom_row_bounds, _output_chunk_read_limit); + + if (_output_chunk_read_limit == 0) { // read the whole file at once + CUDF_EXPECTS(_pass_itm_data->output_chunk_read_info.size() == 1, + "Reading the whole file should yield only one chunk."); + } } - } - _file_preprocessed = true; + _pass_preprocessed = true; + } } void reader::impl::populate_metadata(table_metadata& out_metadata) @@ -382,11 +415,12 @@ table_with_metadata reader::impl::read_chunk_internal( auto out_columns = std::vector>{}; out_columns.reserve(_output_buffers.size()); - if (!has_next() || _chunk_read_info.empty()) { + if (!has_next() || _pass_itm_data->output_chunk_read_info.empty()) { return finalize_output(out_metadata, out_columns, filter); } - auto const& read_info = _chunk_read_info[_current_read_chunk++]; + auto const& read_info = + _pass_itm_data->output_chunk_read_info[_pass_itm_data->current_output_chunk]; // Allocate memory buffers for the output columns. allocate_columns(read_info.skip_rows, read_info.num_rows, uses_custom_row_bounds); @@ -439,6 +473,17 @@ table_with_metadata reader::impl::finalize_output( _output_metadata = std::make_unique(out_metadata); } + // advance chunks/passes as necessary + _pass_itm_data->current_output_chunk++; + _chunk_count++; + if (_pass_itm_data->current_output_chunk >= _pass_itm_data->output_chunk_read_info.size()) { + _pass_itm_data->current_output_chunk = 0; + _pass_itm_data->output_chunk_read_info.clear(); + + _current_input_pass++; + _pass_preprocessed = false; + } + if (filter.has_value()) { auto read_table = std::make_unique
(std::move(out_columns)); auto predicate = cudf::detail::compute_column( @@ -458,7 +503,8 @@ table_with_metadata reader::impl::read( host_span const> row_group_indices, std::optional> filter) { - CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit."); + CUDF_EXPECTS(_output_chunk_read_limit == 0, + "Reading the whole file must not have non-zero byte_limit."); table_metadata metadata; populate_metadata(metadata); auto expr_conv = named_to_reference_converter(filter, metadata); @@ -472,7 +518,7 @@ table_with_metadata reader::impl::read_chunk() { // Reset the output buffers to their original states (right after reader construction). // Don't need to do it if we read the file all at once. - if (_chunk_read_limit > 0) { + if (_chunk_count > 0) { _output_buffers.resize(0); for (auto const& buff : _output_buffers_template) { _output_buffers.emplace_back(inline_column_buffer::empty_like(buff)); @@ -494,7 +540,11 @@ bool reader::impl::has_next() true /*uses_custom_row_bounds*/, {} /*row_group_indices, empty means read all row groups*/, std::nullopt /*filter*/); - return _current_read_chunk < _chunk_read_info.size(); + + auto const num_input_passes = + _input_pass_row_group_offsets.size() == 0 ? 0 : _input_pass_row_group_offsets.size() - 1; + return (_pass_itm_data->current_output_chunk < _pass_itm_data->output_chunk_read_info.size()) || + (_current_input_pass < num_input_passes); } namespace { diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index a980670e465..9445e4d1648 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -90,17 +90,20 @@ class reader::impl { * ``` * * Reading the whole given file at once through `read()` function is still supported if - * `chunk_read_limit == 0` (i.e., no reading limit). - * In such case, `read_chunk()` will also return rows of the entire file. + * `chunk_read_limit == 0` (i.e., no reading limit) and `pass_read_limit == 0` (no temporary + * memory limit) In such case, `read_chunk()` will also return rows of the entire file. * * @param chunk_read_limit Limit on total number of bytes to be returned per read, * or `0` if there is no limit + * @param pass_read_limit Limit on memory usage for the purposes of decompression and processing + * of input, or `0` if there is no limit. * @param sources Dataset sources * @param options Settings for controlling reading behavior * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ explicit impl(std::size_t chunk_read_limit, + std::size_t pass_read_limit, std::vector>&& sources, parquet_reader_options const& options, rmm::cuda_stream_view stream, @@ -133,22 +136,22 @@ class reader::impl { host_span const> row_group_indices, std::optional> filter); + void load_global_chunk_info(); + void compute_input_pass_row_group_info(); + void setup_pass(); + /** * @brief Create chunk information and start file reads * - * @param row_groups_info vector of information about row groups to read - * @param num_rows Maximum number of rows to read * @return pair of boolean indicating if compressed chunks were found and a vector of futures for * read completion */ - std::pair>> create_and_read_column_chunks( - cudf::host_span const row_groups_info, size_type num_rows); + std::pair>> read_and_decompress_column_chunks(); /** * @brief Load and decompress the input file(s) into memory. */ - void load_and_decompress_data(cudf::host_span const row_groups_info, - size_type num_rows); + void load_and_decompress_data(); /** * @brief Perform some preprocessing for page data and also compute the split locations @@ -161,17 +164,12 @@ class reader::impl { * * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders). * - * @param skip_rows Crop all rows below skip_rows - * @param num_rows Maximum number of rows to read * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific * bounds * @param chunk_read_limit Limit on total number of bytes to be returned per read, * or `0` if there is no limit */ - void preprocess_pages(size_t skip_rows, - size_t num_rows, - bool uses_custom_row_bounds, - size_t chunk_read_limit); + void preprocess_pages(bool uses_custom_row_bounds, size_t chunk_read_limit); /** * @brief Allocate nesting information storage for all pages and set pointers to it. @@ -278,12 +276,28 @@ class reader::impl { std::optional> _reader_column_schema; data_type _timestamp_type{type_id::EMPTY}; - // Variables used for chunked reading: + // chunked reading happens in 2 parts: + // + // At the top level there is the "pass" in which we try and limit the + // total amount of temporary memory (compressed data, decompressed data) in use + // via _input_pass_read_limit. + // + // Within a pass, we produce one or more chunks of output, whose maximum total + // byte size is controlled by _output_chunk_read_limit. + cudf::io::parquet::gpu::file_intermediate_data _file_itm_data; - cudf::io::parquet::gpu::chunk_intermediate_data _chunk_itm_data; - std::vector _chunk_read_info; - std::size_t _chunk_read_limit{0}; - std::size_t _current_read_chunk{0}; + std::unique_ptr _pass_itm_data; + + // an array of offsets into _file_itm_data::global_chunks. Each pair of offsets represents + // the start/end of the chunks to be loaded for a given pass. + std::vector _input_pass_row_group_offsets{}; + std::vector _input_pass_row_count{}; + std::size_t _current_input_pass{0}; + std::size_t _chunk_count{0}; + + std::size_t _output_chunk_read_limit{0}; + std::size_t _input_pass_read_limit{0}; + bool _pass_preprocessed{false}; bool _file_preprocessed{false}; }; diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index f6dbeb275fc..fcaa610fbb7 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -344,7 +344,7 @@ std::vector aggregate_reader_metadata::get_pandas_index_names() con return names; } -std::tuple> +std::tuple> aggregate_reader_metadata::select_row_groups( host_span const> row_group_indices, int64_t skip_rows_opt, @@ -362,7 +362,7 @@ aggregate_reader_metadata::select_row_groups( host_span const>(filtered_row_group_indices.value()); } } - std::vector selection; + std::vector selection; auto [rows_to_skip, rows_to_read] = [&]() { if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 751ffc33123..61e4f94df0f 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -53,19 +53,6 @@ using namespace cudf::io::parquet; : data_type{t_id}; } -/** - * @brief The row_group_info class - */ -struct row_group_info { - size_type const index; - size_t const start_row; // TODO source index - size_type const source_index; - row_group_info(size_type index, size_t start_row, size_type source_index) - : index(index), start_row(start_row), source_index(source_index) - { - } -}; - /** * @brief Class for parsing dataset metadata */ @@ -194,7 +181,7 @@ class aggregate_reader_metadata { * @return A tuple of corrected row_start, row_count and list of row group indexes and its * starting row */ - [[nodiscard]] std::tuple> select_row_groups( + [[nodiscard]] std::tuple> select_row_groups( host_span const> row_group_indices, int64_t row_start, std::optional const& row_count, diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index a2db0de26bb..c731c467f2c 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -577,10 +577,10 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c void reader::impl::allocate_nesting_info() { - auto const& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; - auto& page_nesting_info = _file_itm_data.page_nesting_info; - auto& page_nesting_decode_info = _file_itm_data.page_nesting_decode_info; + auto const& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; + auto& page_nesting_info = _pass_itm_data->page_nesting_info; + auto& page_nesting_decode_info = _pass_itm_data->page_nesting_decode_info; // compute total # of page_nesting infos needed and allocate space. doing this in one // buffer to keep it to a single gpu allocation @@ -702,38 +702,39 @@ void reader::impl::allocate_nesting_info() void reader::impl::allocate_level_decode_space() { - auto& pages = _file_itm_data.pages_info; + auto& pages = _pass_itm_data->pages_info; // TODO: this could be made smaller if we ignored dictionary pages and pages with no // repetition data. size_t const per_page_decode_buf_size = - LEVEL_DECODE_BUF_SIZE * 2 * _file_itm_data.level_type_size; + LEVEL_DECODE_BUF_SIZE * 2 * _pass_itm_data->level_type_size; auto const decode_buf_size = per_page_decode_buf_size * pages.size(); - _file_itm_data.level_decode_data = + _pass_itm_data->level_decode_data = rmm::device_buffer(decode_buf_size, _stream, rmm::mr::get_current_device_resource()); // distribute the buffers - uint8_t* buf = static_cast(_file_itm_data.level_decode_data.data()); + uint8_t* buf = static_cast(_pass_itm_data->level_decode_data.data()); for (size_t idx = 0; idx < pages.size(); idx++) { auto& p = pages[idx]; p.lvl_decode_buf[gpu::level_type::DEFINITION] = buf; - buf += (LEVEL_DECODE_BUF_SIZE * _file_itm_data.level_type_size); + buf += (LEVEL_DECODE_BUF_SIZE * _pass_itm_data->level_type_size); p.lvl_decode_buf[gpu::level_type::REPETITION] = buf; - buf += (LEVEL_DECODE_BUF_SIZE * _file_itm_data.level_type_size); + buf += (LEVEL_DECODE_BUF_SIZE * _pass_itm_data->level_type_size); } } -std::pair>> reader::impl::create_and_read_column_chunks( - cudf::host_span const row_groups_info, size_type num_rows) +std::pair>> reader::impl::read_and_decompress_column_chunks() { - auto& raw_page_data = _file_itm_data.raw_page_data; - auto& chunks = _file_itm_data.chunks; + auto const& row_groups_info = _pass_itm_data->row_groups; + auto const num_rows = _pass_itm_data->num_rows; + + auto& raw_page_data = _pass_itm_data->raw_page_data; + auto& chunks = _pass_itm_data->chunks; // Descriptors for all the chunks that make up the selected columns auto const num_input_columns = _input_columns.size(); auto const num_chunks = row_groups_info.size() * num_input_columns; - chunks = cudf::detail::hostdevice_vector(0, num_chunks, _stream); // Association between each column chunk and its source std::vector chunk_source_map(num_chunks); @@ -747,13 +748,68 @@ std::pair>> reader::impl::create_and_read_co // Initialize column chunk information size_t total_decompressed_size = 0; auto remaining_rows = num_rows; - std::vector> read_rowgroup_tasks; + std::vector> read_chunk_tasks; + size_type chunk_count = 0; for (auto const& rg : row_groups_info) { auto const& row_group = _metadata->get_row_group(rg.index, rg.source_index); - auto const row_group_start = rg.start_row; auto const row_group_source = rg.source_index; auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); + // generate ColumnChunkDesc objects for everything to be decoded (all input columns) + for (size_t i = 0; i < num_input_columns; ++i) { + auto const& col = _input_columns[i]; + // look up metadata + auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); + + column_chunk_offsets[chunk_count] = + (col_meta.dictionary_page_offset != 0) + ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset) + : col_meta.data_page_offset; + + // Map each column chunk to its column index and its source index + chunk_source_map[chunk_count] = row_group_source; + + if (col_meta.codec != Compression::UNCOMPRESSED) { + total_decompressed_size += col_meta.total_uncompressed_size; + } + + chunk_count++; + } + remaining_rows -= row_group_rows; + } + + // Read compressed chunk data to device memory + read_chunk_tasks.push_back(read_column_chunks_async(_sources, + raw_page_data, + chunks, + 0, + chunks.size(), + column_chunk_offsets, + chunk_source_map, + _stream)); + + CUDF_EXPECTS(remaining_rows == 0, "All rows data must be read."); + + return {total_decompressed_size > 0, std::move(read_chunk_tasks)}; +} + +void reader::impl::load_global_chunk_info() +{ + auto const num_rows = _file_itm_data.global_num_rows; + auto const& row_groups_info = _file_itm_data.row_groups; + auto& chunks = _file_itm_data.chunks; + + // Descriptors for all the chunks that make up the selected columns + auto const num_input_columns = _input_columns.size(); + auto const num_chunks = row_groups_info.size() * num_input_columns; + + // Initialize column chunk information + auto remaining_rows = num_rows; + for (auto const& rg : row_groups_info) { + auto const& row_group = _metadata->get_row_group(rg.index, rg.source_index); + auto const row_group_start = rg.start_row; + auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); + // generate ColumnChunkDesc objects for everything to be decoded (all input columns) for (size_t i = 0; i < num_input_columns; ++i) { auto col = _input_columns[i]; @@ -768,11 +824,6 @@ std::pair>> reader::impl::create_and_read_co schema.converted_type, schema.type_length); - column_chunk_offsets[chunks.size()] = - (col_meta.dictionary_page_offset != 0) - ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset) - : col_meta.data_page_offset; - chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size, nullptr, col_meta.num_values, @@ -792,92 +843,171 @@ std::pair>> reader::impl::create_and_read_co clock_rate, i, col.schema_idx)); - - // Map each column chunk to its column index and its source index - chunk_source_map[chunks.size() - 1] = row_group_source; - - if (col_meta.codec != Compression::UNCOMPRESSED) { - total_decompressed_size += col_meta.total_uncompressed_size; - } } + remaining_rows -= row_group_rows; } +} - // Read compressed chunk data to device memory - read_rowgroup_tasks.push_back(read_column_chunks_async(_sources, - raw_page_data, - chunks, - 0, - chunks.size(), - column_chunk_offsets, - chunk_source_map, - _stream)); +void reader::impl::compute_input_pass_row_group_info() +{ + // at this point, row_groups has already been filtered down to just the row groups we need to + // handle optional skip_rows/num_rows parameters. + auto const& row_groups_info = _file_itm_data.row_groups; + + // if the user hasn't specified an input size limit, read everything in a single pass. + if (_input_pass_read_limit == 0) { + _input_pass_row_group_offsets.push_back(0); + _input_pass_row_group_offsets.push_back(row_groups_info.size()); + return; + } - CUDF_EXPECTS(remaining_rows == 0, "All rows data must be read."); + // generate passes. make sure to account for the case where a single row group doesn't fit within + // + std::size_t const read_limit = + _input_pass_read_limit > 0 ? _input_pass_read_limit : std::numeric_limits::max(); + std::size_t cur_pass_byte_size = 0; + std::size_t cur_rg_start = 0; + std::size_t cur_row_count = 0; + _input_pass_row_group_offsets.push_back(0); + _input_pass_row_count.push_back(0); + + for (size_t cur_rg_index = 0; cur_rg_index < row_groups_info.size(); cur_rg_index++) { + auto const& rgi = row_groups_info[cur_rg_index]; + auto const& row_group = _metadata->get_row_group(rgi.index, rgi.source_index); + + // can we add this row group + if (cur_pass_byte_size + row_group.total_byte_size >= read_limit) { + // A single row group (the current one) is larger than the read limit: + // We always need to include at least one row group, so end the pass at the end of the current + // row group + if (cur_rg_start == cur_rg_index) { + _input_pass_row_group_offsets.push_back(cur_rg_index + 1); + _input_pass_row_count.push_back(cur_row_count + row_group.num_rows); + cur_rg_start = cur_rg_index + 1; + cur_pass_byte_size = 0; + } + // End the pass at the end of the previous row group + else { + _input_pass_row_group_offsets.push_back(cur_rg_index); + _input_pass_row_count.push_back(cur_row_count); + cur_rg_start = cur_rg_index; + cur_pass_byte_size = row_group.total_byte_size; + } + } else { + cur_pass_byte_size += row_group.total_byte_size; + } + cur_row_count += row_group.num_rows; + } + // add the last pass if necessary + if (_input_pass_row_group_offsets.back() != row_groups_info.size()) { + _input_pass_row_group_offsets.push_back(row_groups_info.size()); + _input_pass_row_count.push_back(cur_row_count); + } +} - return {total_decompressed_size > 0, std::move(read_rowgroup_tasks)}; +void reader::impl::setup_pass() +{ + // this will also cause the previous pass information to be deleted + _pass_itm_data = std::make_unique(); + + // setup row groups to be loaded for this pass + auto const row_group_start = _input_pass_row_group_offsets[_current_input_pass]; + auto const row_group_end = _input_pass_row_group_offsets[_current_input_pass + 1]; + auto const num_row_groups = row_group_end - row_group_start; + _pass_itm_data->row_groups.resize(num_row_groups); + std::copy(_file_itm_data.row_groups.begin() + row_group_start, + _file_itm_data.row_groups.begin() + row_group_end, + _pass_itm_data->row_groups.begin()); + + auto const num_passes = _input_pass_row_group_offsets.size() - 1; + CUDF_EXPECTS(_current_input_pass < num_passes, "Encountered an invalid read pass index"); + + auto const chunks_per_rowgroup = _input_columns.size(); + auto const num_chunks = chunks_per_rowgroup * num_row_groups; + + auto chunk_start = _file_itm_data.chunks.begin() + (row_group_start * chunks_per_rowgroup); + auto chunk_end = _file_itm_data.chunks.begin() + (row_group_end * chunks_per_rowgroup); + + _pass_itm_data->chunks = + cudf::detail::hostdevice_vector(num_chunks, _stream); + std::copy(chunk_start, chunk_end, _pass_itm_data->chunks.begin()); + + // adjust skip_rows and num_rows by what's available in the row groups we are processing + if (num_passes == 1) { + _pass_itm_data->skip_rows = _file_itm_data.global_skip_rows; + _pass_itm_data->num_rows = _file_itm_data.global_num_rows; + } else { + auto const global_start_row = _file_itm_data.global_skip_rows; + auto const global_end_row = global_start_row + _file_itm_data.global_num_rows; + auto const start_row = std::max(_input_pass_row_count[_current_input_pass], global_start_row); + auto const end_row = std::min(_input_pass_row_count[_current_input_pass + 1], global_end_row); + + // skip_rows is always global in the sense that it is relative to the first row of + // everything we will be reading, regardless of what pass we are on. + // num_rows is how many rows we are reading this pass. + _pass_itm_data->skip_rows = global_start_row + _input_pass_row_count[_current_input_pass]; + _pass_itm_data->num_rows = end_row - start_row; + } } -void reader::impl::load_and_decompress_data( - cudf::host_span const row_groups_info, size_type num_rows) +void reader::impl::load_and_decompress_data() { // This function should never be called if `num_rows == 0`. - CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero."); + CUDF_EXPECTS(_pass_itm_data->num_rows > 0, "Number of reading rows must not be zero."); - auto& raw_page_data = _file_itm_data.raw_page_data; - auto& decomp_page_data = _file_itm_data.decomp_page_data; - auto& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; + auto& raw_page_data = _pass_itm_data->raw_page_data; + auto& decomp_page_data = _pass_itm_data->decomp_page_data; + auto& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; - auto const [has_compressed_data, read_rowgroup_tasks] = - create_and_read_column_chunks(row_groups_info, num_rows); + auto const [has_compressed_data, read_chunks_tasks] = read_and_decompress_column_chunks(); - for (auto& task : read_rowgroup_tasks) { + for (auto& task : read_chunks_tasks) { task.wait(); } // Process dataset chunk pages into output columns auto const total_pages = count_page_headers(chunks, _stream); + if (total_pages <= 0) { return; } pages = cudf::detail::hostdevice_vector(total_pages, total_pages, _stream); - if (total_pages > 0) { - // decoding of column/page information - _file_itm_data.level_type_size = decode_page_headers(chunks, pages, _stream); - if (has_compressed_data) { - decomp_page_data = decompress_page_data(chunks, pages, _stream); - // Free compressed data - for (size_t c = 0; c < chunks.size(); c++) { - if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { raw_page_data[c].reset(); } - } + // decoding of column/page information + _pass_itm_data->level_type_size = decode_page_headers(chunks, pages, _stream); + if (has_compressed_data) { + decomp_page_data = decompress_page_data(chunks, pages, _stream); + // Free compressed data + for (size_t c = 0; c < chunks.size(); c++) { + if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { raw_page_data[c].reset(); } } + } - // build output column info - // walk the schema, building out_buffers that mirror what our final cudf columns will look - // like. important : there is not necessarily a 1:1 mapping between input columns and output - // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct - // columns. The "structiness" is simply implied by the schema. For example, this schema: - // required group field_id=1 name { - // required binary field_id=2 firstname (String); - // required binary field_id=3 middlename (String); - // required binary field_id=4 lastname (String); - // } - // will only contain 3 columns of data (firstname, middlename, lastname). But of course - // "name" is a struct column that we want to return, so we have to make sure that we - // create it ourselves. - // std::vector output_info = build_output_column_info(); - - // the following two allocate functions modify the page data - pages.device_to_host_sync(_stream); - { - // nesting information (sizes, etc) stored -per page- - // note : even for flat schemas, we allocate 1 level of "nesting" info - allocate_nesting_info(); + // build output column info + // walk the schema, building out_buffers that mirror what our final cudf columns will look + // like. important : there is not necessarily a 1:1 mapping between input columns and output + // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct + // columns. The "structiness" is simply implied by the schema. For example, this schema: + // required group field_id=1 name { + // required binary field_id=2 firstname (String); + // required binary field_id=3 middlename (String); + // required binary field_id=4 lastname (String); + // } + // will only contain 3 columns of data (firstname, middlename, lastname). But of course + // "name" is a struct column that we want to return, so we have to make sure that we + // create it ourselves. + // std::vector output_info = build_output_column_info(); + + // the following two allocate functions modify the page data + pages.device_to_host_sync(_stream); + { + // nesting information (sizes, etc) stored -per page- + // note : even for flat schemas, we allocate 1 level of "nesting" info + allocate_nesting_info(); - // level decode space - allocate_level_decode_space(); - } - pages.host_to_device_async(_stream); + // level decode space + allocate_level_decode_space(); } + pages.host_to_device_async(_stream); } namespace { @@ -1183,7 +1313,7 @@ std::vector find_splits(std::vector c */ std::vector compute_splits( cudf::detail::hostdevice_vector& pages, - gpu::chunk_intermediate_data const& id, + gpu::pass_intermediate_data const& id, size_t num_rows, size_t chunk_read_limit, rmm::cuda_stream_view stream) @@ -1539,13 +1669,12 @@ struct page_offset_output_iter { } // anonymous namespace -void reader::impl::preprocess_pages(size_t skip_rows, - size_t num_rows, - bool uses_custom_row_bounds, - size_t chunk_read_limit) +void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_read_limit) { - auto& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; + auto const skip_rows = _pass_itm_data->skip_rows; + auto const num_rows = _pass_itm_data->num_rows; + auto& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; // compute page ordering. // @@ -1636,7 +1765,7 @@ void reader::impl::preprocess_pages(size_t skip_rows, // Build index for string dictionaries since they can't be indexed // directly due to variable-sized elements - _chunk_itm_data.str_dict_index = + _pass_itm_data->str_dict_index = cudf::detail::make_zeroed_device_uvector_async( total_str_dict_indexes, _stream, rmm::mr::get_current_device_resource()); @@ -1646,7 +1775,7 @@ void reader::impl::preprocess_pages(size_t skip_rows, CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema, "Column/page schema index mismatch"); if (is_dict_chunk(chunks[c])) { - chunks[c].str_dict_index = _chunk_itm_data.str_dict_index.data() + str_ofs; + chunks[c].str_dict_index = _pass_itm_data->str_dict_index.data() + str_ofs; str_ofs += pages[page_count].num_input_values; } @@ -1677,7 +1806,7 @@ void reader::impl::preprocess_pages(size_t skip_rows, std::numeric_limits::max(), true, // compute num_rows chunk_read_limit > 0, // compute string sizes - _file_itm_data.level_type_size, + _pass_itm_data->level_type_size, _stream); // computes: @@ -1699,20 +1828,21 @@ void reader::impl::preprocess_pages(size_t skip_rows, } // preserve page ordering data for string decoder - _chunk_itm_data.page_keys = std::move(page_keys); - _chunk_itm_data.page_index = std::move(page_index); + _pass_itm_data->page_keys = std::move(page_keys); + _pass_itm_data->page_index = std::move(page_index); // compute splits if necessary. otherwise return a single split representing // the whole file. - _chunk_read_info = chunk_read_limit > 0 - ? compute_splits(pages, _chunk_itm_data, num_rows, chunk_read_limit, _stream) - : std::vector{{skip_rows, num_rows}}; + _pass_itm_data->output_chunk_read_info = + _output_chunk_read_limit > 0 + ? compute_splits(pages, *_pass_itm_data, num_rows, chunk_read_limit, _stream) + : std::vector{{skip_rows, num_rows}}; } void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds) { - auto const& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; + auto const& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; // Should not reach here if there is no page data. CUDF_EXPECTS(pages.size() > 0, "There is no page to parse"); @@ -1729,7 +1859,7 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses num_rows, false, // num_rows is already computed false, // no need to compute string sizes - _file_itm_data.level_type_size, + _pass_itm_data->level_type_size, _stream); // print_pages(pages, _stream); @@ -1766,7 +1896,7 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses // compute output column sizes by examining the pages of the -input- columns if (has_lists) { - auto& page_index = _chunk_itm_data.page_index; + auto& page_index = _pass_itm_data->page_index; std::vector h_cols_info; h_cols_info.reserve(_input_columns.size()); @@ -1846,10 +1976,10 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses std::vector reader::impl::calculate_page_string_offsets() { - auto& chunks = _file_itm_data.chunks; - auto& pages = _file_itm_data.pages_info; - auto const& page_keys = _chunk_itm_data.page_keys; - auto const& page_index = _chunk_itm_data.page_index; + auto& chunks = _pass_itm_data->chunks; + auto& pages = _pass_itm_data->pages_info; + auto const& page_keys = _pass_itm_data->page_keys; + auto const& page_index = _pass_itm_data->page_index; std::vector col_sizes(_input_columns.size(), 0L); rmm::device_uvector d_col_sizes(col_sizes.size(), _stream); diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp index 9815304b965..05fb9a3ec48 100644 --- a/cpp/tests/io/parquet_chunked_reader_test.cpp +++ b/cpp/tests/io/parquet_chunked_reader_test.cpp @@ -100,11 +100,13 @@ auto write_file(std::vector>& input_columns, return std::pair{std::move(input_table), std::move(filepath)}; } -auto chunked_read(std::string const& filepath, std::size_t byte_limit) +auto chunked_read(std::string const& filepath, + std::size_t output_limit, + std::size_t input_limit = 0) { auto const read_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build(); - auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts); + auto reader = cudf::io::chunked_parquet_reader(output_limit, input_limit, read_opts); auto num_chunks = 0; auto out_tables = std::vector>{}; @@ -950,3 +952,65 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadNullCount) EXPECT_EQ(reader.read_chunk().tbl->get_column(0).null_count(), page_limit_rows / 4); } while (reader.has_next()); } + +TEST_F(ParquetChunkedReaderTest, InputLimitSimple) +{ + auto const filepath = temp_env->get_temp_filepath("input_limit_10_rowgroups.parquet"); + + // This results in 10 grow groups, at 4001150 bytes per row group + constexpr int num_rows = 25'000'000; + auto value_iter = cudf::detail::make_counting_transform_iterator(0, [](int i) { return i; }); + cudf::test::fixed_width_column_wrapper expected(value_iter, value_iter + num_rows); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, + cudf::table_view{{expected}}) + // note: it is unnecessary to force compression to NONE here because the size we are using in + // the row group is the uncompressed data size. But forcing the dictionary policy to + // dictionary_policy::NEVER is necessary to prevent changes in the + // decompressed-but-not-yet-decoded data. + .dictionary_policy(cudf::io::dictionary_policy::NEVER); + + cudf::io::write_parquet(opts); + + { + // no chunking + auto const [result, num_chunks] = chunked_read(filepath, 0, 0); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 1); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 4000000); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 25 chunks of 100k rows each + auto const [result, num_chunks] = chunked_read(filepath, 0, 4100000); + EXPECT_EQ(num_chunks, 25); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 12 chunks of 200k rows each, plus 1 final chunk of 100k rows. + auto const [result, num_chunks] = chunked_read(filepath, 0, 8002301); + EXPECT_EQ(num_chunks, 13); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } + + { + // 1 big chunk + auto const [result, num_chunks] = chunked_read(filepath, 0, size_t{1} * 1024 * 1024 * 1024); + EXPECT_EQ(num_chunks, 1); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->get_column(0)); + } +} From 23d24d43fac8615166c38231f13fe8751a8aec42 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Thu, 28 Sep 2023 19:08:55 +0200 Subject: [PATCH 33/76] Add `bytes_per_second` to distinct_count of stream_compaction nvbench. (#14172) This patch relates to #13735. Benchmark: [benchmark_distinct_count.txt](https://github.com/rapidsai/cudf/files/12700496/benchmark_distinct_count.txt) Authors: - Martin Marenz (https://github.com/Blonck) - Mark Harris (https://github.com/harrism) Approvers: - David Wendt (https://github.com/davidwendt) - Karthikeyan (https://github.com/karthikeyann) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/14172 --- cpp/benchmarks/stream_compaction/distinct_count.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/benchmarks/stream_compaction/distinct_count.cpp b/cpp/benchmarks/stream_compaction/distinct_count.cpp index 2b2c901b90f..3e324013d4e 100644 --- a/cpp/benchmarks/stream_compaction/distinct_count.cpp +++ b/cpp/benchmarks/stream_compaction/distinct_count.cpp @@ -40,6 +40,14 @@ static void bench_distinct_count(nvbench::state& state, nvbench::type_list auto const& data_column = data_table->get_column(0); auto const input_table = cudf::table_view{{data_column, data_column, data_column}}; + // Collect memory statistics for input and output. + state.add_global_memory_reads(input_table.num_rows() * input_table.num_columns()); + state.add_global_memory_writes(1); + if (null_probability > 0) { + state.add_global_memory_reads( + input_table.num_columns() * cudf::bitmask_allocation_size_bytes(input_table.num_rows())); + } + auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { From b2f00809f40e2e81b01214177b412456d40404cc Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 28 Sep 2023 12:16:29 -0500 Subject: [PATCH 34/76] Pin dask and distributed for 23.10 release (#14225) This PR pins `dask` and `distributed` to `2023.9.2` for `23.10` release. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ray Douglass (https://github.com/raydouglass) - Peter Andreas Entschev (https://github.com/pentschev) --- ci/test_wheel_dask_cudf.sh | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 6 +++--- conda/environments/all_cuda-120_arch-x86_64.yaml | 6 +++--- conda/recipes/custreamz/meta.yaml | 6 +++--- conda/recipes/dask-cudf/meta.yaml | 12 ++++++------ conda/recipes/dask-cudf/run_test.sh | 4 ++-- dependencies.yaml | 6 +++--- python/dask_cudf/pyproject.toml | 4 ++-- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index d6e7f4bf65e..0abee09ca8a 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -11,7 +11,7 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from python -m pip install --no-deps ./local-cudf-dep/cudf*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 +python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/dask_cudf*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 9fb991f9075..46b0b3799f2 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -25,10 +25,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - dlpack>=0.5,<0.6.0a0 - doxygen=1.9.1 - fastavro>=0.22.9 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 9ba0dd8dc38..0e137c91120 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -26,10 +26,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - dlpack>=0.5,<0.6.0a0 - doxygen=1.9.1 - fastavro>=0.22.9 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 7aaa40bffd0..233d51baf31 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -45,9 +45,9 @@ requirements: - streamz - cudf ={{ version }} - cudf_kafka ={{ version }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 - - distributed >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 - python-confluent-kafka >=1.9.0,<1.10.0a0 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 12809ba648f..4c8af071074 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -38,16 +38,16 @@ requirements: host: - python - cudf ={{ version }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 - - distributed >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 - cuda-version ={{ cuda_version }} run: - python - cudf ={{ version }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 - - distributed >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index 7dc54747a0c..c79c014a89a 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -18,10 +18,10 @@ if [ "${ARCH}" = "aarch64" ]; then fi # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=1 +export INSTALL_DASK_MAIN=0 # Dask version to install when `INSTALL_DASK_MAIN=0` -export DASK_STABLE_VERSION="2023.7.1" +export DASK_STABLE_VERSION="2023.9.2" # Install the conda-forge or nightly version of dask and distributed if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then diff --git a/dependencies.yaml b/dependencies.yaml index 5586f54348c..b21472df4fd 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -491,12 +491,12 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask>=2023.7.1 - - distributed>=2023.7.1 + - dask==2023.9.2 + - distributed==2023.9.2 - output_types: conda packages: - cupy>=12.0.0 - - dask-core>=2023.7.1 # dask-core in conda is the actual package & dask is the meta package + - dask-core==2023.9.2 # dask-core in conda is the actual package & dask is the meta package - output_types: pyproject packages: - &cudf cudf==23.10.* diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 922da366422..41b57b71749 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -20,8 +20,8 @@ requires-python = ">=3.9" dependencies = [ "cudf==23.10.*", "cupy-cuda11x>=12.0.0", - "dask>=2023.7.1", - "distributed>=2023.7.1", + "dask==2023.9.2", + "distributed==2023.9.2", "fsspec>=0.6.0", "numpy>=1.21,<1.25", "pandas>=1.3,<1.6.0dev0", From 59b09fd097e39bd15646eac1156889692974dc5f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 29 Sep 2023 11:10:25 -0500 Subject: [PATCH 35/76] cuDF: Build CUDA 12.0 ARM conda packages. (#14112) This PR builds conda packages using CUDA 12 on ARM. This work is targeting 23.12 and depends on https://github.com/rapidsai/rmm/pull/1330. Closes #14128. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/14112 --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 28 ++++++++++++++-------------- .github/workflows/test.yaml | 16 ++++++++-------- dependencies.yaml | 20 ++------------------ 4 files changed, 32 insertions(+), 48 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ab028eb89cc..dc2c81d1c77 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: ${{ inputs.build_type || 'branch' }} @@ -100,7 +100,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@cuda-120-arm with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 214f9c90b41..047b80f2e5c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,34 +26,34 @@ jobs: - wheel-build-dask-cudf - wheel-tests-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-120-arm checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@cuda-120-arm with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120-arm with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120-arm with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120-arm with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm with: build_type: pull-request test_script: "ci/test_python_cudf.sh" @@ -61,14 +61,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm with: build_type: pull-request test_script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -78,7 +78,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -88,7 +88,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -98,21 +98,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-tests-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request @@ -120,7 +120,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9ca32bcfe03..e58227c30dc 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -36,7 +36,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: test_script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: nightly diff --git a/dependencies.yaml b/dependencies.yaml index c8ee66bd99f..c19e8765be3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -227,25 +227,9 @@ dependencies: # in sync with the version pinned in get_arrow.cmake. - libarrow==12.0.1.* - librdkafka>=1.9.0,<1.10.0a0 + # Align nvcomp version with rapids-cmake + - nvcomp==2.6.1 - spdlog>=1.11.0,<1.12 - specific: - - output_types: conda - matrices: - - matrix: - arch: x86_64 - packages: - # Align nvcomp version with rapids-cmake - # TODO: not yet available for aarch64 CUDA 12 - - &nvcomp nvcomp==2.6.1 - - matrix: - arch: aarch64 - cuda: "11.8" - packages: - - *nvcomp - # TODO: Fallback matrix for aarch64 CUDA 12. After migrating to nvcomp 3, - # all CUDA/arch combinations should be supported by existing packages. - - matrix: - packages: build_wheels: common: - output_types: pyproject From 66a655ce80e8b0accb80ea4e23799d23a82a35a2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 3 Oct 2023 08:00:44 -0500 Subject: [PATCH 36/76] Fix inaccuracy in decimal128 rounding. (#14233) Fixes a bug where floating-point values were used in decimal128 rounding, giving wrong results. Closes https://github.com/rapidsai/cudf/issues/14210. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Divye Gala (https://github.com/divyegala) - Mark Harris (https://github.com/harrism) --- cpp/src/round/round.cu | 5 ++- cpp/tests/round/round_tests.cpp | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 4b3f80fc6e2..41cce57d55b 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -271,7 +271,10 @@ std::unique_ptr round_with(column_view const& input, out_view.template end(), static_cast(0)); } else { - Type const n = std::pow(10, scale_movement); + Type n = 10; + for (int i = 1; i < scale_movement; ++i) { + n *= 10; + } thrust::transform(rmm::exec_policy(stream), input.begin(), input.end(), diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp index d802c0c2706..f97bb7a5323 100644 --- a/cpp/tests/round/round_tests.cpp +++ b/cpp/tests/round/round_tests.cpp @@ -703,4 +703,83 @@ TEST_F(RoundTests, BoolTestHalfUp) EXPECT_THROW(cudf::round(input, -2, cudf::rounding_method::HALF_UP), cudf::logic_error); } +// Use __uint128_t for demonstration. +constexpr __uint128_t operator""_uint128_t(const char* s) +{ + __uint128_t ret = 0; + for (int i = 0; s[i] != '\0'; ++i) { + ret *= 10; + if ('0' <= s[i] && s[i] <= '9') { ret += s[i] - '0'; } + } + return ret; +} + +TEST_F(RoundTests, HalfEvenErrorsA) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + // 0.5 at scale -37 should round HALF_EVEN to 0, because 0 is an even number + auto const input = + fp_wrapper{{5000000000000000000000000000000000000_uint128_t}, scale_type{-37}}; + auto const expected = fp_wrapper{{0}, scale_type{0}}; + auto const result = cudf::round(input, 0, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + +TEST_F(RoundTests, HalfEvenErrorsB) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + // 0.125 at scale -37 should round HALF_EVEN to 0.12, because 2 is an even number + auto const input = + fp_wrapper{{1250000000000000000000000000000000000_uint128_t}, scale_type{-37}}; + auto const expected = fp_wrapper{{12}, scale_type{-2}}; + auto const result = cudf::round(input, 2, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + +TEST_F(RoundTests, HalfEvenErrorsC) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + // 0.0625 at scale -37 should round HALF_EVEN to 0.062, because 2 is an even number + auto const input = + fp_wrapper{{0625000000000000000000000000000000000_uint128_t}, scale_type{-37}}; + auto const expected = fp_wrapper{{62}, scale_type{-3}}; + auto const result = cudf::round(input, 3, cudf::rounding_method::HALF_EVEN); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + +TEST_F(RoundTests, HalfUpErrorsA) +{ + using namespace numeric; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + { + // 0.25 at scale -37 should round HALF_UP to 0.3 + auto const input = + fp_wrapper{{2500000000000000000000000000000000000_uint128_t}, scale_type{-37}}; + auto const expected = fp_wrapper{{3}, scale_type{-1}}; + auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_UP); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + CUDF_TEST_PROGRAM_MAIN() From 3964950ba2fecf7f962917276058a6381d396246 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 3 Oct 2023 15:11:15 -0500 Subject: [PATCH 37/76] Fix inaccurate ceil/floor and inaccurate rescaling casts of fixed-point values. (#14242) This is a follow-up PR to #14233. This PR fixes a bug where floating-point values were used as intermediates in ceil/floor unary operations and cast operations that require rescaling for fixed-point types, giving inaccurate results. See also: - https://github.com/rapidsai/cudf/pull/14233#discussion_r1340786769 - https://github.com/rapidsai/cudf/issues/14243 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Vukasin Milovanovic (https://github.com/vuule) --- cpp/src/unary/cast_ops.cu | 8 +++++- cpp/src/unary/math_ops.cu | 8 ++++-- cpp/tests/unary/cast_tests.cpp | 40 ++++++++++++++++++++++++++++++ cpp/tests/unary/unary_ops_test.cpp | 33 ++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index f40ace8d10b..1c81f266200 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -199,7 +199,13 @@ std::unique_ptr rescale(column_view input, } return output_column; } - auto const scalar = make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}, stream); + + RepType scalar_value = 10; + for (int i = 1; i < -diff; ++i) { + scalar_value *= 10; + } + + auto const scalar = make_fixed_point_scalar(scalar_value, scale_type{diff}, stream); return detail::binary_operation(input, *scalar, binary_operator::DIV, type, stream, mr); } }; diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 961f3a9e720..d0cae81a9c8 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -295,7 +295,11 @@ std::unique_ptr unary_op_with(column_view const& input, input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); auto out_view = result->mutable_view(); - Type const n = std::pow(10, -input.type().scale()); + + Type n = 10; + for (int i = 1; i < -input.type().scale(); ++i) { + n *= 10; + } thrust::transform(rmm::exec_policy(stream), input.begin(), diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index 9506e1918c0..d565359a4ea 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -30,6 +30,8 @@ #include #include +#include + #include #include @@ -967,6 +969,44 @@ TYPED_TEST(FixedPointTests, Decimal128ToDecimalXXWithLargerScale) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TYPED_TEST(FixedPointTests, ValidateCastRescalePrecision) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + // This test is designed to protect against floating point conversion + // introducing errors in fixed-point arithmetic. The rescaling that occurs + // during casting to different scales should only use fixed-precision math. + // Realistically, we are only able to show precision failures due to floating + // conversion in a few very specific circumstances where dividing by specific + // powers of 10 works against us. Some examples: 10^23, 10^25, 10^26, 10^27, + // 10^30, 10^32, 10^36. See https://godbolt.org/z/cP1MddP8P for a derivation. + // For completeness and to ensure that we are not missing any other cases, we + // test casting to/from all scales in the range of each decimal type. Values + // that are powers of ten show this error more readily than non-powers of 10 + // because the rescaling factor is a power of 10, meaning that errors in + // division are more visible. + constexpr auto min_scale = -cuda::std::numeric_limits::digits10; + for (int input_scale = 0; input_scale >= min_scale; --input_scale) { + for (int result_scale = 0; result_scale >= min_scale; --result_scale) { + RepType input_value = 1; + for (int k = 0; k > input_scale; --k) { + input_value *= 10; + } + RepType result_value = 1; + for (int k = 0; k > result_scale; --k) { + result_value *= 10; + } + auto const input = fp_wrapper{{input_value}, scale_type{input_scale}}; + auto const expected = fp_wrapper{{result_value}, scale_type{result_scale}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(result_scale)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + } +} + TYPED_TEST(FixedPointTests, Decimal32ToDecimalXXWithLargerScaleAndNullMask) { using namespace numeric; diff --git a/cpp/tests/unary/unary_ops_test.cpp b/cpp/tests/unary/unary_ops_test.cpp index 49764f22373..76d1f769856 100644 --- a/cpp/tests/unary/unary_ops_test.cpp +++ b/cpp/tests/unary/unary_ops_test.cpp @@ -24,6 +24,8 @@ #include +#include + template cudf::test::fixed_width_column_wrapper create_fixed_columns(cudf::size_type start, cudf::size_type size, @@ -372,4 +374,35 @@ TYPED_TEST(FixedPointUnaryTests, FixedPointUnaryFloorLarge) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TYPED_TEST(FixedPointUnaryTests, ValidateCeilFloorPrecision) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + // This test is designed to protect against floating point conversion + // introducing errors in fixed-point arithmetic. The rounding that occurs + // during ceil/floor should only use fixed-precision math. Realistically, + // we are only able to show precision failures due to floating conversion in + // a few very specific circumstances where dividing by specific powers of 10 + // works against us. Some examples: 10^23, 10^25, 10^26, 10^27, 10^30, + // 10^32, 10^36. See https://godbolt.org/z/cP1MddP8P for a derivation. For + // completeness and to ensure that we are not missing any other cases, we + // test all scales representable in the range of each decimal type. + constexpr auto min_scale = -cuda::std::numeric_limits::digits10; + for (int input_scale = 0; input_scale >= min_scale; --input_scale) { + RepType input_value = 1; + for (int k = 0; k > input_scale; --k) { + input_value *= 10; + } + auto const input = fp_wrapper{{input_value}, scale_type{input_scale}}; + auto const expected = fp_wrapper{{input_value}, scale_type{input_scale}}; + auto const ceil_result = cudf::unary_operation(input, cudf::unary_operator::CEIL); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, ceil_result->view()); + auto const floor_result = cudf::unary_operation(input, cudf::unary_operator::FLOOR); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, floor_result->view()); + } +} + CUDF_TEST_PROGRAM_MAIN() From 29556a2514f4d274164a27a80539410da7e132d6 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 3 Oct 2023 14:44:28 -0700 Subject: [PATCH 38/76] Remove the use of volatile in ORC (#14175) `volatile` should no be required in our code, unless there are compiler or synchronization issues. This PR removes the use in ORC reader and writer. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14175 --- cpp/src/io/orc/stats_enc.cu | 4 +- cpp/src/io/orc/stripe_data.cu | 82 +++++++++++++++-------------------- cpp/src/io/orc/stripe_enc.cu | 14 +++--- cpp/src/io/orc/stripe_init.cu | 2 +- 4 files changed, 46 insertions(+), 56 deletions(-) diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 95f1db5bfd1..479a2dfada3 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -76,8 +76,8 @@ __global__ void __launch_bounds__(block_size, 1) { using block_scan = cub::BlockScan; __shared__ typename block_scan::TempStorage temp_storage; - volatile uint32_t stats_size = 0; - auto t = threadIdx.x; + uint32_t stats_size = 0; + auto t = threadIdx.x; __syncthreads(); for (thread_index_type start = 0; start < statistics_count; start += block_size) { uint32_t stats_len = 0, stats_pos; diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 3edcd3d83b2..0b249bbdafe 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -142,9 +142,7 @@ struct orcdec_state_s { * @param[in] base Pointer to raw byte stream data * @param[in] len Stream length in bytes */ -static __device__ void bytestream_init(volatile orc_bytestream_s* bs, - uint8_t const* base, - uint32_t len) +static __device__ void bytestream_init(orc_bytestream_s* bs, uint8_t const* base, uint32_t len) { uint32_t pos = (len > 0) ? static_cast(7 & reinterpret_cast(base)) : 0; bs->base = base - pos; @@ -160,8 +158,7 @@ static __device__ void bytestream_init(volatile orc_bytestream_s* bs, * @param[in] bs Byte stream input * @param[in] bytes_consumed Number of bytes that were consumed */ -static __device__ void bytestream_flush_bytes(volatile orc_bytestream_s* bs, - uint32_t bytes_consumed) +static __device__ void bytestream_flush_bytes(orc_bytestream_s* bs, uint32_t bytes_consumed) { uint32_t pos = bs->pos; uint32_t len = bs->len; @@ -197,7 +194,7 @@ static __device__ void bytestream_fill(orc_bytestream_s* bs, int t) * @param[in] pos Position in byte stream * @return byte */ -inline __device__ uint8_t bytestream_readbyte(volatile orc_bytestream_s* bs, int pos) +inline __device__ uint8_t bytestream_readbyte(orc_bytestream_s* bs, int pos) { return bs->buf.u8[pos & (bytestream_buffer_size - 1)]; } @@ -209,7 +206,7 @@ inline __device__ uint8_t bytestream_readbyte(volatile orc_bytestream_s* bs, int * @param[in] pos Position in byte stream * @result bits */ -inline __device__ uint32_t bytestream_readu32(volatile orc_bytestream_s* bs, int pos) +inline __device__ uint32_t bytestream_readu32(orc_bytestream_s* bs, int pos) { uint32_t a = bs->buf.u32[(pos & (bytestream_buffer_size - 1)) >> 2]; uint32_t b = bs->buf.u32[((pos + 4) & (bytestream_buffer_size - 1)) >> 2]; @@ -224,7 +221,7 @@ inline __device__ uint32_t bytestream_readu32(volatile orc_bytestream_s* bs, int * @param[in] numbits number of bits * @return bits */ -inline __device__ uint64_t bytestream_readu64(volatile orc_bytestream_s* bs, int pos) +inline __device__ uint64_t bytestream_readu64(orc_bytestream_s* bs, int pos) { uint32_t a = bs->buf.u32[(pos & (bytestream_buffer_size - 1)) >> 2]; uint32_t b = bs->buf.u32[((pos + 4) & (bytestream_buffer_size - 1)) >> 2]; @@ -245,9 +242,7 @@ inline __device__ uint64_t bytestream_readu64(volatile orc_bytestream_s* bs, int * @param[in] numbits number of bits * @return decoded value */ -inline __device__ uint32_t bytestream_readbits(volatile orc_bytestream_s* bs, - int bitpos, - uint32_t numbits) +inline __device__ uint32_t bytestream_readbits(orc_bytestream_s* bs, int bitpos, uint32_t numbits) { int idx = bitpos >> 5; uint32_t a = __byte_perm(bs->buf.u32[(idx + 0) & bytestream_buffer_mask], 0, 0x0123); @@ -263,9 +258,7 @@ inline __device__ uint32_t bytestream_readbits(volatile orc_bytestream_s* bs, * @param[in] numbits number of bits * @return decoded value */ -inline __device__ uint64_t bytestream_readbits64(volatile orc_bytestream_s* bs, - int bitpos, - uint32_t numbits) +inline __device__ uint64_t bytestream_readbits64(orc_bytestream_s* bs, int bitpos, uint32_t numbits) { int idx = bitpos >> 5; uint32_t a = __byte_perm(bs->buf.u32[(idx + 0) & bytestream_buffer_mask], 0, 0x0123); @@ -288,7 +281,7 @@ inline __device__ uint64_t bytestream_readbits64(volatile orc_bytestream_s* bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, +inline __device__ void bytestream_readbe(orc_bytestream_s* bs, int bitpos, uint32_t numbits, uint32_t& result) @@ -304,7 +297,7 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, +inline __device__ void bytestream_readbe(orc_bytestream_s* bs, int bitpos, uint32_t numbits, int32_t& result) @@ -321,7 +314,7 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, +inline __device__ void bytestream_readbe(orc_bytestream_s* bs, int bitpos, uint32_t numbits, uint64_t& result) @@ -337,7 +330,7 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, +inline __device__ void bytestream_readbe(orc_bytestream_s* bs, int bitpos, uint32_t numbits, int64_t& result) @@ -354,7 +347,7 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, * @return length of varint in bytes */ template -inline __device__ uint32_t varint_length(volatile orc_bytestream_s* bs, int pos) +inline __device__ uint32_t varint_length(orc_bytestream_s* bs, int pos) { if (bytestream_readbyte(bs, pos) > 0x7f) { uint32_t next32 = bytestream_readu32(bs, pos + 1); @@ -392,7 +385,7 @@ inline __device__ uint32_t varint_length(volatile orc_bytestream_s* bs, int pos) * @return new position in byte stream buffer */ template -inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int pos, T& result) +inline __device__ int decode_base128_varint(orc_bytestream_s* bs, int pos, T& result) { uint32_t v = bytestream_readbyte(bs, pos++); if (v > 0x7f) { @@ -446,7 +439,7 @@ inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int p /** * @brief Decodes a signed int128 encoded as base-128 varint (used for decimals) */ -inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int pos) +inline __device__ __int128_t decode_varint128(orc_bytestream_s* bs, int pos) { auto byte = bytestream_readbyte(bs, pos++); __int128_t const sign_mask = -(int32_t)(byte & 1); @@ -463,7 +456,7 @@ inline __device__ __int128_t decode_varint128(volatile orc_bytestream_s* bs, int /** * @brief Decodes an unsigned 32-bit varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint32_t& result) +inline __device__ int decode_varint(orc_bytestream_s* bs, int pos, uint32_t& result) { uint32_t u; pos = decode_base128_varint(bs, pos, u); @@ -474,7 +467,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint /** * @brief Decodes an unsigned 64-bit varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint64_t& result) +inline __device__ int decode_varint(orc_bytestream_s* bs, int pos, uint64_t& result) { uint64_t u; pos = decode_base128_varint(bs, pos, u); @@ -485,7 +478,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint /** * @brief Signed version of 32-bit decode_varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int32_t& result) +inline __device__ int decode_varint(orc_bytestream_s* bs, int pos, int32_t& result) { uint32_t u; pos = decode_base128_varint(bs, pos, u); @@ -496,7 +489,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int3 /** * @brief Signed version of 64-bit decode_varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int64_t& result) +inline __device__ int decode_varint(orc_bytestream_s* bs, int pos, int64_t& result) { uint64_t u; pos = decode_base128_varint(bs, pos, u); @@ -514,7 +507,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int6 * @return number of values decoded */ template -inline __device__ void lengths_to_positions(volatile T* vals, uint32_t numvals, unsigned int t) +inline __device__ void lengths_to_positions(T* vals, uint32_t numvals, unsigned int t) { for (uint32_t n = 1; n < numvals; n <<= 1) { __syncthreads(); @@ -534,8 +527,8 @@ inline __device__ void lengths_to_positions(volatile T* vals, uint32_t numvals, * @return number of values decoded */ template -static __device__ uint32_t Integer_RLEv1( - orc_bytestream_s* bs, volatile orc_rlev1_state_s* rle, volatile T* vals, uint32_t maxvals, int t) +static __device__ uint32_t +Integer_RLEv1(orc_bytestream_s* bs, orc_rlev1_state_s* rle, T* vals, uint32_t maxvals, int t) { uint32_t numvals, numruns; if (t == 0) { @@ -642,8 +635,8 @@ static const __device__ __constant__ uint8_t ClosestFixedBitsMap[65] = { */ template static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, - volatile orc_rlev2_state_s* rle, - volatile T* vals, + orc_rlev2_state_s* rle, + T* vals, uint32_t maxvals, int t, bool has_buffered_values = false) @@ -883,7 +876,7 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, * * @return 32-bit value */ -inline __device__ uint32_t rle8_read_bool32(volatile uint32_t* vals, uint32_t bitpos) +inline __device__ uint32_t rle8_read_bool32(uint32_t* vals, uint32_t bitpos) { uint32_t a = vals[(bitpos >> 5) + 0]; uint32_t b = vals[(bitpos >> 5) + 1]; @@ -903,11 +896,8 @@ inline __device__ uint32_t rle8_read_bool32(volatile uint32_t* vals, uint32_t bi * * @return number of values decoded */ -static __device__ uint32_t Byte_RLE(orc_bytestream_s* bs, - volatile orc_byterle_state_s* rle, - volatile uint8_t* vals, - uint32_t maxvals, - int t) +static __device__ uint32_t +Byte_RLE(orc_bytestream_s* bs, orc_byterle_state_s* rle, uint8_t* vals, uint32_t maxvals, int t) { uint32_t numvals, numruns; int r, tr; @@ -1006,8 +996,8 @@ static const __device__ __constant__ int64_t kPow5i[28] = {1, * @return number of values decoded */ static __device__ int Decode_Decimals(orc_bytestream_s* bs, - volatile orc_byterle_state_s* scratch, - volatile orcdec_state_s::values& vals, + orc_byterle_state_s* scratch, + orcdec_state_s::values& vals, int val_scale, int numvals, type_id dtype_id, @@ -1241,8 +1231,8 @@ __global__ void __launch_bounds__(block_size) } __syncthreads(); while (s->top.dict.dict_len > 0) { - uint32_t numvals = min(s->top.dict.dict_len, blockDim.x), len; - volatile uint32_t* vals = s->vals.u32; + uint32_t numvals = min(s->top.dict.dict_len, blockDim.x), len; + uint32_t* vals = s->vals.u32; bytestream_fill(&s->bs, t); __syncthreads(); if (is_rlev1(s->chunk.encoding_kind)) { @@ -1310,12 +1300,12 @@ static __device__ void DecodeRowPositions(orcdec_state_s* s, min((row_decoder_buffer_size - s->u.rowdec.nz_count) * 2, blockDim.x)); if (s->chunk.valid_map_base != nullptr) { // We have a present stream - uint32_t rmax = s->top.data.end_row - min((uint32_t)first_row, s->top.data.end_row); - auto r = (uint32_t)(s->top.data.cur_row + s->top.data.nrows + t - first_row); - uint32_t valid = (t < nrows && r < rmax) - ? (((uint8_t const*)s->chunk.valid_map_base)[r >> 3] >> (r & 7)) & 1 - : 0; - volatile auto* row_ofs_plus1 = (volatile uint16_t*)&s->u.rowdec.row[s->u.rowdec.nz_count]; + uint32_t rmax = s->top.data.end_row - min((uint32_t)first_row, s->top.data.end_row); + auto r = (uint32_t)(s->top.data.cur_row + s->top.data.nrows + t - first_row); + uint32_t valid = (t < nrows && r < rmax) + ? (((uint8_t const*)s->chunk.valid_map_base)[r >> 3] >> (r & 7)) & 1 + : 0; + auto* row_ofs_plus1 = (uint16_t*)&s->u.rowdec.row[s->u.rowdec.nz_count]; uint32_t nz_pos, row_plus1, nz_count = s->u.rowdec.nz_count, last_row; if (t < nrows) { row_ofs_plus1[t] = valid; } lengths_to_positions(row_ofs_plus1, nrows, t); diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 73c41e2bbcd..4841fb1141a 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -53,7 +53,7 @@ constexpr bool zero_pll_war = true; struct byterle_enc_state_s { uint32_t literal_run; uint32_t repeat_run; - volatile uint32_t rpt_map[(512 / 32) + 1]; + uint32_t rpt_map[(512 / 32) + 1]; }; struct intrle_enc_state_s { @@ -63,7 +63,7 @@ struct intrle_enc_state_s { uint32_t literal_w; uint32_t hdr_bytes; uint32_t pl_bytes; - volatile uint32_t delta_map[(512 / 32) + 1]; + uint32_t delta_map[(512 / 32) + 1]; }; struct strdata_enc_state_s { @@ -366,7 +366,7 @@ static __device__ uint32_t IntegerRLE( using block_reduce = cub::BlockReduce; uint8_t* dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; uint32_t out_cnt = 0; - __shared__ volatile uint64_t block_vmin; + __shared__ uint64_t block_vmin; while (numvals > 0) { T v0 = (t < numvals) ? inbuf[(inpos + t) & inmask] : 0; @@ -615,7 +615,7 @@ static __device__ void StoreStringData(uint8_t* dst, * @param[in] t thread id */ template -inline __device__ void lengths_to_positions(volatile T* vals, uint32_t numvals, unsigned int t) +inline __device__ void lengths_to_positions(T* vals, uint32_t numvals, unsigned int t) { for (uint32_t n = 1; n < numvals; n <<= 1) { __syncthreads(); @@ -1143,7 +1143,7 @@ __global__ void __launch_bounds__(256) uint32_t comp_block_align) { __shared__ __align__(16) StripeStream ss; - __shared__ uint8_t* volatile uncomp_base_g; + __shared__ uint8_t* uncomp_base_g; auto const padded_block_header_size = util::round_up_unsafe(block_header_size, comp_block_align); auto const padded_comp_block_size = util::round_up_unsafe(max_comp_blk_size, comp_block_align); @@ -1196,8 +1196,8 @@ __global__ void __launch_bounds__(1024) uint32_t max_comp_blk_size) { __shared__ __align__(16) StripeStream ss; - __shared__ uint8_t const* volatile comp_src_g; - __shared__ uint32_t volatile comp_len_g; + __shared__ uint8_t const* comp_src_g; + __shared__ uint32_t comp_len_g; auto const stripe_id = blockIdx.x; auto const stream_id = blockIdx.y; diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 8eeca504121..b31a4a081d1 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -499,7 +499,7 @@ __global__ void __launch_bounds__(128, 8) gpuParseRowGroupIndex(RowGroup* row_gr : row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].start_row; for (int j = t4; j < rowgroup_size4; j += 4) { ((uint32_t*)&row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x])[j] = - ((volatile uint32_t*)&s->rowgroups[i])[j]; + ((uint32_t*)&s->rowgroups[i])[j]; } row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].num_rows = num_rows; // Updating in case of struct From d87e181daa67d8fb1a029fc2c09e2f561d1e7234 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 4 Oct 2023 13:25:56 -0700 Subject: [PATCH 39/76] Expose streams in binaryop APIs (#14187) Contributes to #925 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14187 --- cpp/include/cudf/binaryop.hpp | 8 ++ cpp/src/binaryop/binaryop.cpp | 12 ++- cpp/src/binaryop/compiled/binary_ops.cu | 6 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/binaryop_test.cpp | 126 ++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 cpp/tests/streams/binaryop_test.cpp diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 77d6a4d1e89..9df4b4eb00f 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -102,6 +102,7 @@ enum class binary_operator : int32_t { * @param rhs The right operand column * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -115,6 +116,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -131,6 +133,7 @@ std::unique_ptr binary_operation( * @param rhs The right operand scalar * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -144,6 +147,7 @@ std::unique_ptr binary_operation( scalar const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -158,6 +162,7 @@ std::unique_ptr binary_operation( * @param rhs The right operand column * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -172,6 +177,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -189,6 +195,7 @@ std::unique_ptr binary_operation( * @param output_type The desired data type of the output column. It is assumed * that output_type is compatible with the output data type * of the function in the PTX code + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -201,6 +208,7 @@ std::unique_ptr binary_operation( column_view const& rhs, std::string const& ptx, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index ef07de8c461..6b413ab2be4 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -405,38 +405,42 @@ std::unique_ptr binary_operation(scalar const& lhs, column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, scalar const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, column_view const& rhs, std::string const& ptx, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, ptx, output_type, stream, mr); } } // namespace cudf diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 1f7f342632a..85ab5c6d6cb 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -47,14 +47,16 @@ namespace { struct scalar_as_column_view { using return_type = typename std::pair>; template ())> - return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + return_type operator()(scalar const& s, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource*) { auto& h_scalar_type_view = static_cast&>(const_cast(s)); auto col_v = column_view(s.type(), 1, h_scalar_type_view.data(), reinterpret_cast(s.validity_data()), - !s.is_valid()); + !s.is_valid(stream)); return std::pair{col_v, std::unique_ptr(nullptr)}; } template ())> diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 04939f3cd6d..ac13c121530 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -622,6 +622,7 @@ ConfigureTest( STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu ) +ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/binaryop_test.cpp b/cpp/tests/streams/binaryop_test.cpp new file mode 100644 index 00000000000..2520aed0458 --- /dev/null +++ b/cpp/tests/streams/binaryop_test.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +class BinaryopTest : public cudf::test::BaseFixture {}; + +TEST_F(BinaryopTest, ColumnColumn) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +TEST_F(BinaryopTest, ColumnScalar) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::numeric_scalar rhs{23, true, cudf::test::get_default_stream()}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +TEST_F(BinaryopTest, ScalarColumn) +{ + cudf::numeric_scalar lhs{42, true, cudf::test::get_default_stream()}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +class BinaryopPTXTest : public BinaryopTest { + protected: + void SetUp() override + { + if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; } + } +}; + +TEST_F(BinaryopPTXTest, ColumnColumnPTX) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + // c = a*a*a + b*b + char const* ptx = + R"***( +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-24817639 +// Cuda compilation tools, release 10.0, V10.0.130 +// Based on LLVM 3.4svn +// + +.version 6.3 +.target sm_70 +.address_size 64 + + // .globl _ZN8__main__7add$241Eix +.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add$241Eix; +.common .global .align 8 .u64 _ZN08NumbaEnv5numba7targets7numbers14int_power_impl12$3clocals$3e13int_power$242Exx; + +.visible .func (.param .b32 func_retval0) _ZN8__main__7add$241Eix( + .param .b64 _ZN8__main__7add$241Eix_param_0, + .param .b32 _ZN8__main__7add$241Eix_param_1, + .param .b64 _ZN8__main__7add$241Eix_param_2 +) +{ + .reg .b32 %r<3>; + .reg .b64 %rd<8>; + + + ld.param.u64 %rd1, [_ZN8__main__7add$241Eix_param_0]; + ld.param.u32 %r1, [_ZN8__main__7add$241Eix_param_1]; + ld.param.u64 %rd2, [_ZN8__main__7add$241Eix_param_2]; + cvt.s64.s32 %rd3, %r1; + mul.wide.s32 %rd4, %r1, %r1; + mul.lo.s64 %rd5, %rd4, %rd3; + mul.lo.s64 %rd6, %rd2, %rd2; + add.s64 %rd7, %rd6, %rd5; + st.u64 [%rd1], %rd7; + mov.u32 %r2, 0; + st.param.b32 [func_retval0+0], %r2; + ret; +} + +)***"; + + cudf::binary_operation( + lhs, rhs, ptx, cudf::data_type(cudf::type_to_id()), cudf::test::get_default_stream()); + cudf::binary_operation(lhs, rhs, ptx, cudf::data_type(cudf::type_to_id())); +} From b120f7e73e882b4eaa6b5a2cb91aeed20bf1198d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 4 Oct 2023 14:23:24 -0700 Subject: [PATCH 40/76] Improve `contains_column` by invoking `contains_table` (#14238) Part of #https://github.com/rapidsai/cudf/issues/12261 This PR simplifies the `contains_column` implementation by invoking `contains_table` and gets rid of the use of the cudf `unordered_multiset`. It also removes the `unordered_multiset` header file from libcudf. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14238 --- cpp/src/hash/unordered_multiset.cuh | 159 ---------------------------- cpp/src/search/contains_column.cu | 67 +----------- 2 files changed, 1 insertion(+), 225 deletions(-) delete mode 100644 cpp/src/hash/unordered_multiset.cuh diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh deleted file mode 100644 index 183042fc0f4..00000000000 --- a/cpp/src/hash/unordered_multiset.cuh +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -namespace cudf { -namespace detail { -/* - * Device view of the unordered multiset - */ -template , - typename Equality = equal_to> -class unordered_multiset_device_view { - public: - unordered_multiset_device_view(size_type hash_size, - size_type const* hash_begin, - Element const* hash_data) - : hash_size{hash_size}, hash_begin{hash_begin}, hash_data{hash_data}, hasher(), equals() - { - } - - bool __device__ contains(Element e) const - { - size_type loc = hasher(e) % (2 * hash_size); - - for (size_type i = hash_begin[loc]; i < hash_begin[loc + 1]; ++i) { - if (equals(hash_data[i], e)) return true; - } - - return false; - } - - private: - Hasher hasher; - Equality equals; - size_type hash_size; - size_type const* hash_begin; - Element const* hash_data; -}; - -/* - * Fixed size set on a device. - */ -template , - typename Equality = equal_to> -class unordered_multiset { - public: - /** - * @brief Factory to construct a new unordered_multiset - */ - static unordered_multiset create(column_view const& col, rmm::cuda_stream_view stream) - { - auto d_column = column_device_view::create(col, stream); - auto d_col = *d_column; - - auto hash_bins_start = cudf::detail::make_zeroed_device_uvector_async( - 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); - auto hash_bins_end = cudf::detail::make_zeroed_device_uvector_async( - 2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource()); - auto hash_data = rmm::device_uvector(d_col.size(), stream); - - Hasher hasher; - size_type* d_hash_bins_start = hash_bins_start.data(); - size_type* d_hash_bins_end = hash_bins_end.data(); - Element* d_hash_data = hash_data.data(); - - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(col.size()), - [d_hash_bins_start, d_col, hasher] __device__(size_t idx) { - if (!d_col.is_null(idx)) { - Element e = d_col.element(idx); - size_type tmp = hasher(e) % (2 * d_col.size()); - cuda::atomic_ref ref{*(d_hash_bins_start + tmp)}; - ref.fetch_add(1, cuda::std::memory_order_relaxed); - } - }); - - thrust::exclusive_scan(rmm::exec_policy(stream), - hash_bins_start.begin(), - hash_bins_start.end(), - hash_bins_end.begin()); - - thrust::copy(rmm::exec_policy(stream), - hash_bins_end.begin(), - hash_bins_end.end(), - hash_bins_start.begin()); - - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(col.size()), - [d_hash_bins_end, d_hash_data, d_col, hasher] __device__(size_t idx) { - if (!d_col.is_null(idx)) { - Element e = d_col.element(idx); - size_type tmp = hasher(e) % (2 * d_col.size()); - cuda::atomic_ref ref{*(d_hash_bins_end + tmp)}; - size_type offset = ref.fetch_add(1, cuda::std::memory_order_relaxed); - d_hash_data[offset] = e; - } - }); - - return unordered_multiset(d_col.size(), std::move(hash_bins_start), std::move(hash_data)); - } - - unordered_multiset_device_view to_device() const - { - return unordered_multiset_device_view( - size, hash_bins.data(), hash_data.data()); - } - - private: - unordered_multiset(size_type size, - rmm::device_uvector&& hash_bins, - rmm::device_uvector&& hash_data) - : size{size}, hash_bins{std::move(hash_bins)}, hash_data{std::move(hash_data)} - { - } - - size_type size; - rmm::device_uvector hash_bins; - rmm::device_uvector hash_data; -}; - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu index 4363bd212fe..85971647434 100644 --- a/cpp/src/search/contains_column.cu +++ b/cpp/src/search/contains_column.cu @@ -14,23 +14,14 @@ * limitations under the License. */ -#include - -#include #include #include #include #include #include #include -#include #include -#include - -#include -#include -#include namespace cudf { namespace detail { @@ -38,61 +29,7 @@ namespace detail { namespace { struct contains_column_dispatch { - template - struct contains_fn { - bool __device__ operator()(size_type const idx) const - { - if (needles_have_nulls && needles.is_null_nocheck(idx)) { - // Exit early. The value doesn't matter, and will be masked as a null element. - return true; - } - - return haystack.contains(needles.template element(idx)); - } - - Haystack const haystack; - column_device_view const needles; - bool const needles_have_nulls; - }; - - template ())> - std::unique_ptr operator()(column_view const& haystack, - column_view const& needles, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - auto result = make_numeric_column(data_type{type_to_id()}, - needles.size(), - copy_bitmask(needles, stream, mr), - needles.null_count(), - stream, - mr); - if (needles.is_empty()) { return result; } - - auto const out_begin = result->mutable_view().template begin(); - if (haystack.is_empty()) { - thrust::uninitialized_fill( - rmm::exec_policy(stream), out_begin, out_begin + needles.size(), false); - return result; - } - - auto const haystack_set = cudf::detail::unordered_multiset::create(haystack, stream); - auto const haystack_set_dv = haystack_set.to_device(); - auto const needles_cdv_ptr = column_device_view::create(needles, stream); - - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(needles.size()), - out_begin, - contains_fn{ - haystack_set_dv, *needles_cdv_ptr, needles.has_nulls()}); - - result->set_null_count(needles.null_count()); - - return result; - } - - template ())> + template std::unique_ptr operator()(column_view const& haystack, column_view const& needles, rmm::cuda_stream_view stream, @@ -144,8 +81,6 @@ std::unique_ptr contains(column_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(haystack.type() == needles.type(), "DTYPE mismatch"); - return cudf::type_dispatcher( haystack.type(), contains_column_dispatch{}, haystack, needles, stream, mr); } From 5d311ea76ddc8bdbb357b6afdf64dfce6ece39a7 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:21:31 -0400 Subject: [PATCH 41/76] Fix strings replace for adjacent, identical multi-byte UTF-8 character targets (#14235) Fixes bug that can occur when replacing all occurrences in a string using a multi-byte UTF-8 target when the target matches sequentially in the same string -- some characters were missed. Specialized gtest is also added. Found while working on #13891 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14235 --- cpp/src/strings/replace/replace.cu | 2 +- cpp/tests/strings/replace_tests.cpp | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index a622d1a742d..acc1502f4d6 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -97,7 +97,7 @@ struct replace_row_parallel_fn { } else { bytes += d_repl.size_bytes() - d_target.size_bytes(); } - position = d_str.find(d_target, position + d_target.size_bytes()); + position = d_str.find(d_target, position + d_target.length()); --max_n; } if (out_ptr) // copy whats left (or right depending on your point of view) diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp index f143983aded..f04bb832f09 100644 --- a/cpp/tests/strings/replace_tests.cpp +++ b/cpp/tests/strings/replace_tests.cpp @@ -246,6 +246,28 @@ TEST_F(StringsReplaceTest, ReplaceEndOfString) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } +TEST_F(StringsReplaceTest, ReplaceAdjacentMultiByteTarget) +{ + auto input = cudf::test::strings_column_wrapper({"ééééééé", "eéeéeée", "eeeeeee"}); + auto strings_view = cudf::strings_column_view(input); + // replace all occurrences of 'é' with 'e' + cudf::test::strings_column_wrapper expected({"eeeeeee", "eeeeeee", "eeeeeee"}); + + auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); + + auto target = cudf::string_scalar("é", true, stream); + auto repl = cudf::string_scalar("e", true, stream); + auto results = cudf::strings::replace(strings_view, target, repl); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + results = cudf::strings::detail::replace( + strings_view, target, repl, -1, stream, mr); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + results = cudf::strings::detail::replace( + strings_view, target, repl, -1, stream, mr); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); +} + TEST_F(StringsReplaceTest, ReplaceSlice) { std::vector h_strings{"Héllo", "thesé", nullptr, "ARE THE", "tést strings", ""}; From 04e2cd6ff4d525390d4a416651cefa16e11c2a50 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 6 Oct 2023 09:33:16 -0400 Subject: [PATCH 42/76] cudf::detail::pinned_allocator doesn't throw from `deallocate` (#14251) Fixes #14165 The deallocate function is called by the `pinned_host_vector`. Throwing from destructors is bad since they can't be caught, and generally get converted into runtime sig aborts. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - David Wendt (https://github.com/davidwendt) - Divye Gala (https://github.com/divyegala) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/14251 --- cpp/include/cudf/detail/utilities/pinned_host_vector.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp index 9e2b85ea129..eee974c8399 100644 --- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp @@ -169,7 +169,12 @@ class pinned_allocator { * It is the responsibility of the caller to destroy * the objects stored at \p p. */ - __host__ inline void deallocate(pointer p, size_type /*cnt*/) { CUDF_CUDA_TRY(cudaFreeHost(p)); } + __host__ inline void deallocate(pointer p, size_type /*cnt*/) + { + auto dealloc_worked = cudaFreeHost(p); + (void)dealloc_worked; + assert(dealloc_worked == cudaSuccess); + } /** * @brief This method returns the maximum size of the \c cnt parameter From fc3694730334971c6c7bd916bf36b71302cfcd42 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 6 Oct 2023 14:03:32 -0400 Subject: [PATCH 43/76] Fixing parquet list of struct interpretation (#13715) This change alters how we interpret non-annotated data in a parquet file. Most modern parquet writers would produce something like: ``` message spark_schema { required int32 id; optional group phoneNumbers (LIST) { repeated group phone { required int64 number; optional binary kind (STRING); } } } ``` But the list annotation isn't required. If it didn't exist, we would incorrectly interpret this schema as a struct of struct and not a list of struct. This change alters the code to look at the child and see if it is repeated. If it is, this indicates a list. closes #13664 Authors: - Mike Wilson (https://github.com/hyperbolic2346) - Vukasin Milovanovic (https://github.com/vuule) - Mark Harris (https://github.com/harrism) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/13715 --- cpp/src/io/parquet/page_decode.cuh | 2 +- cpp/src/io/parquet/parquet.hpp | 2 +- cpp/src/io/parquet/reader_impl_helpers.cpp | 86 ++++++++++++++++++++-- cpp/src/io/parquet/reader_impl_helpers.hpp | 1 + cpp/tests/io/parquet_test.cpp | 78 ++++++++++++++++++++ 5 files changed, 162 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index cdc29197eb3..d70cabdd35f 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -753,7 +753,7 @@ __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_input_value // for nested schemas, it's more complicated. This warp will visit 32 incoming values, // however not all of them will necessarily represent a value at this nesting level. so // the validity bit for thread t might actually represent output value t-6. the correct - // position for thread t's bit is cur_value_count. for cuda 11 we could use + // position for thread t's bit is thread_value_count. for cuda 11 we could use // __reduce_or_sync(), but until then we have to do a warp reduce. WarpReduceOr32(is_valid << thread_value_count); diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index c2affc774c2..1df49262e87 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -206,7 +206,7 @@ struct SchemaElement { { return type == UNDEFINED_TYPE && // this assumption might be a little weak. - ((repetition_type != REPEATED) || (repetition_type == REPEATED && num_children == 2)); + ((repetition_type != REPEATED) || (repetition_type == REPEATED && num_children > 1)); } }; diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index fcaa610fbb7..9778cfc47d2 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -175,6 +175,81 @@ type_id to_type_id(SchemaElement const& schema, return type_id::EMPTY; } +void metadata::sanitize_schema() +{ + // Parquet isn't very strict about incoming metadata. Lots of things can and should be inferred. + // There are also a lot of rules that simply aren't followed and are expected to be worked around. + // This step sanitizes the metadata to something that isn't ambiguous. + // + // Take, for example, the following schema: + // + // required group field_id=-1 user { + // required int32 field_id=-1 id; + // optional group field_id=-1 phoneNumbers { + // repeated group field_id=-1 phone { + // required int64 field_id=-1 number; + // optional binary field_id=-1 kind (String); + // } + // } + // } + // + // This real-world example has no annotations telling us what is a list or a struct. On the + // surface this looks like a column of id's and a column of list>, but this + // actually should be interpreted as a struct>>. The phoneNumbers field + // has to be a struct because it is a group with no repeated tag and we have no annotation. The + // repeated group is actually BOTH a struct due to the multiple children and a list due to + // repeated. + // + // This code attempts to make this less messy for the code that follows. + + std::function process = [&](size_t schema_idx) -> void { + if (schema_idx < 0) { return; } + auto& schema_elem = schema[schema_idx]; + if (schema_idx != 0 && schema_elem.type == UNDEFINED_TYPE) { + auto const parent_type = schema[schema_elem.parent_idx].converted_type; + if (schema_elem.repetition_type == REPEATED && schema_elem.num_children > 1 && + parent_type != LIST && parent_type != MAP) { + // This is a list of structs, so we need to mark this as a list, but also + // add a struct child and move this element's children to the struct + schema_elem.converted_type = LIST; + schema_elem.repetition_type = OPTIONAL; + auto const struct_node_idx = schema.size(); + + SchemaElement struct_elem; + struct_elem.name = "struct_node"; + struct_elem.repetition_type = REQUIRED; + struct_elem.num_children = schema_elem.num_children; + struct_elem.type = UNDEFINED_TYPE; + struct_elem.converted_type = UNKNOWN; + + // swap children + struct_elem.children_idx = std::move(schema_elem.children_idx); + schema_elem.children_idx = {struct_node_idx}; + schema_elem.num_children = 1; + + struct_elem.max_definition_level = schema_elem.max_definition_level; + struct_elem.max_repetition_level = schema_elem.max_repetition_level; + schema_elem.max_definition_level--; + schema_elem.max_repetition_level = schema[schema_elem.parent_idx].max_repetition_level; + + // change parent index on new node and on children + struct_elem.parent_idx = schema_idx; + for (auto& child_idx : struct_elem.children_idx) { + schema[child_idx].parent_idx = struct_node_idx; + } + // add our struct + schema.push_back(struct_elem); + } + } + + for (auto& child_idx : schema_elem.children_idx) { + process(child_idx); + } + }; + + process(0); +} + metadata::metadata(datasource* source) { constexpr auto header_len = sizeof(file_header_s); @@ -195,6 +270,7 @@ metadata::metadata(datasource* source) CompactProtocolReader cp(buffer->data(), ender->footer_len); CUDF_EXPECTS(cp.read(this), "Cannot parse metadata"); CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema"); + sanitize_schema(); } std::vector aggregate_reader_metadata::metadatas_from_sources( @@ -445,8 +521,10 @@ aggregate_reader_metadata::select_columns(std::optional child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent); } + auto const one_level_list = schema_elem.is_one_level_list(get_schema(schema_elem.parent_idx)); + // if we're at the root, this is a new output column - auto const col_type = schema_elem.is_one_level_list(get_schema(schema_elem.parent_idx)) + auto const col_type = one_level_list ? type_id::LIST : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id); auto const dtype = to_data_type(col_type, schema_elem); @@ -485,7 +563,7 @@ aggregate_reader_metadata::select_columns(std::optional input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0}); // set up child output column for one-level encoding list - if (schema_elem.is_one_level_list(get_schema(schema_elem.parent_idx))) { + if (one_level_list) { // determine the element data type auto const element_type = to_type_id(schema_elem, strings_to_categorical, timestamp_type_id); @@ -506,9 +584,7 @@ aggregate_reader_metadata::select_columns(std::optional std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting)); // pop off the extra nesting element. - if (schema_elem.is_one_level_list(get_schema(schema_elem.parent_idx))) { - nesting.pop_back(); - } + if (one_level_list) { nesting.pop_back(); } path_is_valid = true; // If we're able to reach leaf then path is valid } diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 61e4f94df0f..9ee17f26a10 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -58,6 +58,7 @@ using namespace cudf::io::parquet; */ struct metadata : public FileMetaData { explicit metadata(datasource* source); + void sanitize_schema(); }; class aggregate_reader_metadata { diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 81e0e12eeb9..73c946a5feb 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -6732,4 +6732,82 @@ TEST_P(ParquetV2Test, CheckEncodings) } } +TEST_F(ParquetReaderTest, RepeatedNoAnnotations) +{ + constexpr unsigned char repeated_bytes[] = { + 0x50, 0x41, 0x52, 0x31, 0x15, 0x04, 0x15, 0x30, 0x15, 0x30, 0x4c, 0x15, 0x0c, 0x15, 0x00, 0x12, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x0a, 0x15, 0x0a, + 0x2c, 0x15, 0x0c, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x03, 0x03, 0x88, 0xc6, 0x02, + 0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64, 0x15, + 0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x15, 0x04, 0x15, + 0x40, 0x15, 0x40, 0x4c, 0x15, 0x08, 0x15, 0x00, 0x12, 0x00, 0x00, 0xe3, 0x0c, 0x23, 0x4b, 0x01, + 0x00, 0x00, 0x00, 0xc7, 0x35, 0x3a, 0x42, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x6b, 0x74, 0x84, 0x00, + 0x00, 0x00, 0x00, 0x55, 0xa1, 0xae, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x22, 0x15, + 0x22, 0x2c, 0x15, 0x10, 0x15, 0x10, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x03, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x03, 0x90, 0xaa, 0x02, 0x03, 0x94, 0x03, 0x26, 0xda, 0x02, + 0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, + 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75, 0x6d, + 0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96, 0x02, + 0x26, 0xba, 0x01, 0x00, 0x00, 0x15, 0x04, 0x15, 0x24, 0x15, 0x24, 0x4c, 0x15, 0x04, 0x15, 0x00, + 0x12, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x68, 0x6f, 0x6d, 0x65, 0x06, 0x00, 0x00, 0x00, 0x6d, + 0x6f, 0x62, 0x69, 0x6c, 0x65, 0x15, 0x00, 0x15, 0x20, 0x15, 0x20, 0x2c, 0x15, 0x10, 0x15, 0x10, + 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0xc0, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x90, 0xef, 0x01, 0x03, 0x04, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10, + 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, + 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82, + 0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x15, 0x02, 0x19, 0x6c, + 0x48, 0x04, 0x75, 0x73, 0x65, 0x72, 0x15, 0x04, 0x00, 0x15, 0x02, 0x25, 0x00, 0x18, 0x02, 0x69, + 0x64, 0x00, 0x35, 0x02, 0x18, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, + 0x72, 0x73, 0x15, 0x02, 0x00, 0x35, 0x04, 0x18, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x15, 0x04, + 0x00, 0x15, 0x04, 0x25, 0x00, 0x18, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x00, 0x15, 0x0c, + 0x25, 0x02, 0x18, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x25, 0x00, 0x00, 0x16, 0x00, 0x19, 0x1c, 0x19, + 0x3c, 0x26, 0x80, 0x01, 0x1c, 0x15, 0x02, 0x19, 0x25, 0x00, 0x10, 0x19, 0x18, 0x02, 0x69, 0x64, + 0x15, 0x00, 0x16, 0x0c, 0x16, 0x78, 0x16, 0x78, 0x26, 0x54, 0x26, 0x08, 0x00, 0x00, 0x26, 0xda, + 0x02, 0x1c, 0x15, 0x04, 0x19, 0x25, 0x00, 0x10, 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, + 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x06, 0x6e, 0x75, + 0x6d, 0x62, 0x65, 0x72, 0x15, 0x00, 0x16, 0x10, 0x16, 0xa0, 0x01, 0x16, 0xa0, 0x01, 0x26, 0x96, + 0x02, 0x26, 0xba, 0x01, 0x00, 0x00, 0x26, 0xcc, 0x04, 0x1c, 0x15, 0x0c, 0x19, 0x25, 0x00, 0x10, + 0x19, 0x38, 0x0c, 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x05, + 0x70, 0x68, 0x6f, 0x6e, 0x65, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x15, 0x00, 0x16, 0x10, 0x16, 0x82, + 0x01, 0x16, 0x82, 0x01, 0x26, 0x8a, 0x04, 0x26, 0xca, 0x03, 0x00, 0x00, 0x16, 0x9a, 0x03, 0x16, + 0x0c, 0x00, 0x28, 0x49, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d, 0x72, 0x73, 0x20, 0x76, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x30, 0x2e, 0x33, 0x2e, 0x30, 0x20, 0x28, 0x62, 0x75, + 0x69, 0x6c, 0x64, 0x20, 0x62, 0x34, 0x35, 0x63, 0x65, 0x37, 0x63, 0x62, 0x61, 0x32, 0x31, 0x39, + 0x39, 0x66, 0x32, 0x32, 0x64, 0x39, 0x33, 0x32, 0x36, 0x39, 0x63, 0x31, 0x35, 0x30, 0x64, 0x38, + 0x61, 0x38, 0x33, 0x39, 0x31, 0x36, 0x63, 0x36, 0x39, 0x62, 0x35, 0x65, 0x29, 0x00, 0x32, 0x01, + 0x00, 0x00, 0x50, 0x41, 0x52, 0x31}; + + auto read_opts = cudf::io::parquet_reader_options::builder( + cudf::io::source_info{reinterpret_cast(repeated_bytes), sizeof(repeated_bytes)}); + auto result = cudf::io::read_parquet(read_opts); + + EXPECT_EQ(result.tbl->view().column(0).size(), 6); + EXPECT_EQ(result.tbl->view().num_columns(), 2); + + column_wrapper col0{1, 2, 3, 4, 5, 6}; + column_wrapper child0{{5555555555l, 1111111111l, 1111111111l, 2222222222l, 3333333333l}}; + cudf::test::strings_column_wrapper child1{{"-", "home", "home", "-", "mobile"}, {0, 1, 1, 0, 1}}; + auto struct_col = cudf::test::structs_column_wrapper{{child0, child1}}; + + auto list_offsets_column = + cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 1, 2, 5}.release(); + auto num_list_rows = list_offsets_column->size() - 1; + + auto mask = cudf::create_null_mask(6, cudf::mask_state::ALL_VALID); + cudf::set_null_mask(static_cast(mask.data()), 0, 2, false); + + auto list_col = cudf::make_lists_column( + num_list_rows, std::move(list_offsets_column), struct_col.release(), 2, std::move(mask)); + + std::vector> struct_children; + struct_children.push_back(std::move(list_col)); + + auto outer_struct = + cudf::test::structs_column_wrapper{{std::move(struct_children)}, {0, 0, 1, 1, 1, 1}}; + table_view expected{{col0, outer_struct}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), expected); +} + CUDF_TEST_PROGRAM_MAIN() From 96664ec7436033f59aa5b9740e6f54aec707e3cf Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 6 Oct 2023 15:09:11 -0700 Subject: [PATCH 44/76] Add pylibcudf.Scalar that interoperates with Arrow scalars (#14133) This PR adds a new Scalar object to pylibcudf that will function as the pylibcudf equivalent of cudf::scalar. Unlike columns, which are typically operated on in the form of views rather than owning types by libcudf, owning scalars are accepted by (const) ref by libcudf APIs and no corresponding view type exists. Therefore, pylibcudf.Scalar differs from pylibcudf.Column by actually owning an instance of the underlying libcudf type (cudf::scalar). Construction of pylibcudf Scalars is expected to be done from an Arrow scalar. This PR relies on #14124 and should not be merged until after that one. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/14133 --- python/cudf/cudf/_lib/CMakeLists.txt | 8 +- python/cudf/cudf/_lib/datetime.pyx | 6 +- python/cudf/cudf/_lib/interop.pyx | 95 +------------ python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 8 ++ .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 25 +++- python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 5 +- python/cudf/cudf/_lib/pylibcudf/__init__.py | 5 +- python/cudf/cudf/_lib/pylibcudf/interop.pxd | 9 ++ python/cudf/cudf/_lib/pylibcudf/interop.pyx | 23 +++ python/cudf/cudf/_lib/pylibcudf/scalar.pxd | 32 +++++ python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 133 ++++++++++++++++++ python/cudf/cudf/_lib/pylibcudf/table.pxd | 3 + python/cudf/cudf/_lib/pylibcudf/table.pyx | 33 ++++- python/cudf/cudf/_lib/scalar.pxd | 13 +- python/cudf/cudf/_lib/scalar.pyx | 88 ++++++++---- python/cudf/cudf/_lib/strings/CMakeLists.txt | 10 +- .../cudf/_lib/strings/convert/CMakeLists.txt | 10 +- .../cudf/_lib/strings/split/CMakeLists.txt | 10 +- 18 files changed, 378 insertions(+), 138 deletions(-) create mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 947659c290a..1b543b94589 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -107,8 +107,12 @@ if(${PYARROW_RESULT}) message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") endif() -set(targets_using_arrow_headers interop avro csv orc json parquet) -foreach(target IN LISTS targets_using_arrow_headers) +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. +# These requirements will go away once all scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") endforeach() diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 81949dbaa20..3d96f59c4d6 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock @@ -10,6 +10,7 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.filling cimport calendrical_month_sequence +from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar @@ -166,10 +167,11 @@ def date_range(DeviceScalar start, size_type n, offset): + offset.kwds.get("months", 0) ) + cdef const scalar* c_start = start.c_value.get() with nogil: c_result = move(calendrical_month_sequence( n, - start.c_value.get()[0], + c_start[0], months )) return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 639754fc54f..8fd2a409d90 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -4,14 +4,7 @@ from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector -from pyarrow.lib cimport ( - CScalar, - CTable, - pyarrow_unwrap_scalar, - pyarrow_unwrap_table, - pyarrow_wrap_scalar, - pyarrow_wrap_table, -) +from pyarrow.lib cimport CTable, pyarrow_unwrap_table, pyarrow_wrap_table from cudf._lib.cpp.interop cimport ( DLManagedTensor, @@ -21,22 +14,12 @@ from cudf._lib.cpp.interop cimport ( to_arrow as cpp_to_arrow, to_dlpack as cpp_to_dlpack, ) -from cudf._lib.cpp.scalar.scalar cimport fixed_point_scalar, scalar from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.cpp.types cimport type_id -from cudf._lib.cpp.wrappers.decimals cimport ( - decimal32, - decimal64, - decimal128, - scale_type, -) -from cudf._lib.scalar cimport DeviceScalar from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns from cudf.api.types import is_list_dtype, is_struct_dtype from cudf.core.buffer import acquire_spill_lock -from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype def from_dlpack(dlpack_capsule): @@ -199,79 +182,3 @@ def from_arrow(object input_table): c_result = move(cpp_from_arrow(cpp_arrow_table.get()[0])) return columns_from_unique_ptr(move(c_result)) - - -@acquire_spill_lock() -def to_arrow_scalar(DeviceScalar source_scalar): - """Convert a scalar to a PyArrow scalar. - - Parameters - ---------- - source_scalar : the scalar to convert - - Returns - ------- - pyarrow.lib.Scalar - """ - cdef vector[column_metadata] cpp_metadata = gather_metadata( - [("", source_scalar.dtype)] - ) - cdef const scalar* source_scalar_ptr = source_scalar.get_raw_ptr() - - cdef shared_ptr[CScalar] cpp_arrow_scalar - with nogil: - cpp_arrow_scalar = cpp_to_arrow( - source_scalar_ptr[0], cpp_metadata[0] - ) - - return pyarrow_wrap_scalar(cpp_arrow_scalar) - - -@acquire_spill_lock() -def from_arrow_scalar(object input_scalar, output_dtype=None): - """Convert from PyArrow scalar to a cudf scalar. - - Parameters - ---------- - input_scalar : PyArrow scalar - output_dtype : output type to cast to, ignored except for decimals - - Returns - ------- - cudf._lib.DeviceScalar - """ - cdef shared_ptr[CScalar] cpp_arrow_scalar = ( - pyarrow_unwrap_scalar(input_scalar) - ) - cdef unique_ptr[scalar] c_result - - with nogil: - c_result = move(cpp_from_arrow(cpp_arrow_scalar.get()[0])) - - cdef type_id ctype = c_result.get().type().id() - if ctype == type_id.DECIMAL128: - if output_dtype is None: - # Decimals must be cast to the cudf dtype of the right width - raise ValueError( - "Decimal scalars must be constructed with a dtype" - ) - - if isinstance(output_dtype, Decimal32Dtype): - c_result.reset( - new fixed_point_scalar[decimal32]( - ( c_result.get()).value(), - scale_type(-input_scalar.type.scale), - c_result.get().is_valid() - ) - ) - elif isinstance(output_dtype, Decimal64Dtype): - c_result.reset( - new fixed_point_scalar[decimal64]( - ( c_result.get()).value(), - scale_type(-input_scalar.type.scale), - c_result.get().is_valid() - ) - ) - # Decimal128Dtype is a no-op, no conversion needed. - - return DeviceScalar.from_unique_ptr(move(c_result), output_dtype) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 515b9c1d6e4..d4e2392ee04 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,3 +22,11 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf ) +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. +# These requirements will go away once all scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index 0ce42dc43ff..5185b2d4bb5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -12,10 +12,33 @@ # the License. # ============================================================================= -set(cython_sources column.pyx copying.pyx gpumemoryview.pyx table.pyx types.pyx utils.pyx) +set(cython_sources column.pyx copying.pyx gpumemoryview.pyx interop.pyx scalar.pyx table.pyx + types.pyx utils.pyx +) set(linked_libraries cudf::cudf) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) + +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" + OUTPUT_VARIABLE PYARROW_INCLUDE_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() + +# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for +# more info +find_package(NumPy REQUIRED) +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. + # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") +endforeach() diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index ba7822b0a54..7a35854392c 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -1,9 +1,10 @@ # Copyright (c) 2023, NVIDIA CORPORATION. # TODO: Verify consistent usage of relative/absolute imports in pylibcudf. -from . cimport copying +from . cimport copying, interop from .column cimport Column from .gpumemoryview cimport gpumemoryview +from .scalar cimport Scalar from .table cimport Table # TODO: cimport type_id once # https://github.com/cython/cython/issues/5609 is resolved @@ -12,7 +13,9 @@ from .types cimport DataType __all__ = [ "Column", "DataType", + "Scalar", "Table", "copying", "gpumemoryview", + "interop", ] diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py index 3edff9a53e8..72b74a57b87 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py @@ -1,16 +1,19 @@ # Copyright (c) 2023, NVIDIA CORPORATION. -from . import copying +from . import copying, interop from .column import Column from .gpumemoryview import gpumemoryview +from .scalar import Scalar from .table import Table from .types import DataType, TypeId __all__ = [ "Column", "DataType", + "Scalar", "Table", "TypeId", "copying", "gpumemoryview", + "interop", ] diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd new file mode 100644 index 00000000000..3a79e5425d4 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/interop.pxd @@ -0,0 +1,9 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cudf._lib.cpp.interop cimport column_metadata + + +cdef class ColumnMetadata: + cdef public object name + cdef public object children_meta + cdef column_metadata to_libcudf(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx new file mode 100644 index 00000000000..0cdca275027 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx @@ -0,0 +1,23 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cudf._lib.cpp.interop cimport column_metadata + + +cdef class ColumnMetadata: + def __init__(self, name): + self.name = name + self.children_meta = [] + + cdef column_metadata to_libcudf(self): + """Convert to C++ column_metadata. + + Since this class is mutable and cheap, it is easier to create the C++ + object on the fly rather than have it directly backing the storage for + the Cython class. + """ + cdef column_metadata c_metadata + cdef ColumnMetadata child_meta + c_metadata.name = self.name.encode() + for child_meta in self.children_meta: + c_metadata.children_meta.push_back(child_meta.to_libcudf()) + return c_metadata diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd new file mode 100644 index 00000000000..09d853d832f --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd @@ -0,0 +1,32 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from pyarrow cimport lib as pa + +from rmm._lib.memory_resource cimport DeviceMemoryResource + +from cudf._lib.cpp.scalar.scalar cimport scalar + +from .interop cimport ColumnMetadata +from .types cimport DataType + + +cdef class Scalar: + cdef unique_ptr[scalar] c_obj + cdef DataType _data_type + + # Holds a reference to the DeviceMemoryResource used for allocation. + # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is + # needed for deallocation + cdef DeviceMemoryResource mr + + cdef const scalar* get(self) except * + + cpdef DataType type(self) + cpdef bool is_valid(self) + + @staticmethod + cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*) + + cpdef pa.Scalar to_arrow(self, ColumnMetadata metadata) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx new file mode 100644 index 00000000000..04f588bd3e6 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx @@ -0,0 +1,133 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cython cimport no_gc_clear +from cython.operator cimport dereference +from libcpp.memory cimport shared_ptr, unique_ptr +from libcpp.utility cimport move +from pyarrow cimport lib as pa + +from rmm._lib.memory_resource cimport get_current_device_resource + +from cudf._lib.cpp.interop cimport ( + column_metadata, + from_arrow as cpp_from_arrow, + to_arrow as cpp_to_arrow, +) +from cudf._lib.cpp.scalar.scalar cimport fixed_point_scalar, scalar +from cudf._lib.cpp.wrappers.decimals cimport ( + decimal32, + decimal64, + decimal128, + scale_type, +) + +from .interop cimport ColumnMetadata +from .types cimport DataType, type_id + + +# The DeviceMemoryResource attribute could be released prematurely +# by the gc if the Scalar is in a reference cycle. Removing the tp_clear +# function with the no_gc_clear decoration prevents that. See +# https://github.com/rapidsai/rmm/pull/931 for details. +@no_gc_clear +cdef class Scalar: + """A scalar value in device memory.""" + # Unlike for columns, libcudf does not support scalar views. All APIs that + # accept scalar values accept references to the owning object rather than a + # special view type. As a result, pylibcudf.Scalar has a simpler structure + # than pylibcudf.Column because it can be a true wrapper around a libcudf + # column + + def __cinit__(self, *args, **kwargs): + self.mr = get_current_device_resource() + + def __init__(self, pa.Scalar value=None): + # TODO: This case is not something we really want to + # support, but it here for now to ease the transition of + # DeviceScalar. + if value is not None: + raise ValueError("Scalar should be constructed with a factory") + + @staticmethod + def from_arrow(pa.Scalar value, DataType data_type=None): + # Allow passing a dtype, but only for the purpose of decimals for now + + cdef shared_ptr[pa.CScalar] cscalar = ( + pa.pyarrow_unwrap_scalar(value) + ) + cdef unique_ptr[scalar] c_result + + with nogil: + c_result = move(cpp_from_arrow(cscalar.get()[0])) + + cdef Scalar s = Scalar.from_libcudf(move(c_result)) + + if s.type().id() != type_id.DECIMAL128: + if data_type is not None: + raise ValueError( + "dtype may not be passed for non-decimal types" + ) + return s + + if data_type is None: + raise ValueError( + "Decimal scalars must be constructed with a dtype" + ) + + cdef type_id tid = data_type.id() + + if tid == type_id.DECIMAL32: + s.c_obj.reset( + new fixed_point_scalar[decimal32]( + ( s.c_obj.get()).value(), + scale_type(-value.type.scale), + s.c_obj.get().is_valid() + ) + ) + elif tid == type_id.DECIMAL64: + s.c_obj.reset( + new fixed_point_scalar[decimal64]( + ( s.c_obj.get()).value(), + scale_type(-value.type.scale), + s.c_obj.get().is_valid() + ) + ) + elif tid != type_id.DECIMAL128: + raise ValueError( + "Decimal scalars may only be cast to decimals" + ) + + return s + + cpdef pa.Scalar to_arrow(self, ColumnMetadata metadata): + cdef shared_ptr[pa.CScalar] c_result + cdef column_metadata c_metadata = metadata.to_libcudf() + + with nogil: + c_result = move(cpp_to_arrow(dereference(self.c_obj.get()), c_metadata)) + + return pa.pyarrow_wrap_scalar(c_result) + + cdef const scalar* get(self) except *: + return self.c_obj.get() + + cpdef DataType type(self): + """The type of data in the column.""" + return self._data_type + + cpdef bool is_valid(self): + """True if the scalar is valid, false if not""" + return self.get().is_valid() + + @staticmethod + cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=None): + """Construct a Scalar object from a libcudf scalar. + + This method is for pylibcudf's functions to use to ingest outputs of + calling libcudf algorithms, and should generally not be needed by users + (even direct pylibcudf Cython users). + """ + cdef Scalar s = Scalar.__new__(Scalar) + s.c_obj.swap(libcudf_scalar) + s._data_type = DataType.from_libcudf(s.get().type()) + return s diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pxd b/python/cudf/cudf/_lib/pylibcudf/table.pxd index 95f197b13eb..a9e2874232a 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/table.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2023, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from pyarrow cimport lib as pa from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view @@ -16,3 +17,5 @@ cdef class Table: cdef Table from_libcudf(unique_ptr[table] libcudf_tbl) cpdef list columns(self) + + cpdef pa.Table to_arrow(self, list metadata) diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pyx b/python/cudf/cudf/_lib/pylibcudf/table.pyx index 720f9815bd6..c41eb82e4a1 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/table.pyx @@ -1,15 +1,22 @@ # Copyright (c) 2023, NVIDIA CORPORATION. from cython.operator cimport dereference -from libcpp.memory cimport unique_ptr +from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector +from pyarrow cimport lib as pa from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.interop cimport ( + column_metadata, + from_arrow as cpp_from_arrow, + to_arrow as cpp_to_arrow, +) from cudf._lib.cpp.table.table cimport table from .column cimport Column +from .interop cimport ColumnMetadata cdef class Table: @@ -60,3 +67,27 @@ cdef class Table: cpdef list columns(self): return self._columns + + @staticmethod + def from_arrow(pa.Table pyarrow_table): + cdef shared_ptr[pa.CTable] ctable = ( + pa.pyarrow_unwrap_table(pyarrow_table) + ) + cdef unique_ptr[table] c_result + + with nogil: + c_result = move(cpp_from_arrow(ctable.get()[0])) + + return Table.from_libcudf(move(c_result)) + + cpdef pa.Table to_arrow(self, list metadata): + cdef shared_ptr[pa.CTable] c_result + cdef vector[column_metadata] c_metadata + cdef ColumnMetadata meta + for meta in metadata: + c_metadata.push_back(meta.to_libcudf()) + + with nogil: + c_result = move(cpp_to_arrow(self.view(), c_metadata)) + + return pa.pyarrow_wrap_table(c_result) diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd index 1deed60d67d..77733f59c3d 100644 --- a/python/cudf/cudf/_lib/scalar.pxd +++ b/python/cudf/cudf/_lib/scalar.pxd @@ -1,20 +1,19 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr from rmm._lib.memory_resource cimport DeviceMemoryResource +# TODO: Would like to remove this cimport, but it will require some more work +# to excise all C code in scalar.pyx that relies on using the C API of the +# pylibcudf Scalar underlying the DeviceScalar. +from cudf._lib cimport pylibcudf from cudf._lib.cpp.scalar.scalar cimport scalar cdef class DeviceScalar: - cdef unique_ptr[scalar] c_value - - # Holds a reference to the DeviceMemoryResource used for allocation. - # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is - # needed for deallocation - cdef DeviceMemoryResource mr + cdef pylibcudf.Scalar c_value cdef object _dtype diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 5ab286c5701..0b64c75f7b6 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -1,7 +1,5 @@ # Copyright (c) 2020-2023, NVIDIA CORPORATION. -cimport cython - import copy import numpy as np @@ -13,17 +11,17 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move -from rmm._lib.memory_resource cimport get_current_device_resource - import cudf +from cudf._lib import pylibcudf from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES -from cudf.core.dtypes import ListDtype, StructDtype +from cudf.core.dtypes import ( + ListDtype, + StructDtype, + is_list_dtype, + is_struct_dtype, +) from cudf.core.missing import NA, NaT -from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id - -from cudf._lib.interop import from_arrow_scalar, to_arrow_scalar - cimport cudf._lib.cpp.types as libcudf_types from cudf._lib.cpp.scalar.scalar cimport ( duration_scalar, @@ -44,6 +42,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_s, timestamp_us, ) +from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id def _replace_nested(obj, check, replacement): @@ -61,15 +60,44 @@ def _replace_nested(obj, check, replacement): _replace_nested(v, check, replacement) -# The DeviceMemoryResource attribute could be released prematurely -# by the gc if the DeviceScalar is in a reference cycle. Removing -# the tp_clear function with the no_gc_clear decoration prevents that. -# See https://github.com/rapidsai/rmm/pull/931 for details. -@cython.no_gc_clear +def gather_metadata(dtypes): + """Convert a dict of dtypes to a list of ColumnMetadata objects. + + The metadata is constructed recursively so that nested types are + represented as nested ColumnMetadata objects. + + Parameters + ---------- + dtypes : dict + A dict mapping column names to dtypes. + + Returns + ------- + List[ColumnMetadata] + A list of ColumnMetadata objects. + """ + out = [] + for name, dtype in dtypes.items(): + v = pylibcudf.interop.ColumnMetadata(name) + if is_struct_dtype(dtype): + v.children_meta = gather_metadata(dtype.fields) + elif is_list_dtype(dtype): + # Offsets column is unnamed and has no children + v.children_meta.append(pylibcudf.interop.ColumnMetadata("")) + v.children_meta.extend( + gather_metadata({"": dtype.element_type}) + ) + out.append(v) + return out + + cdef class DeviceScalar: + # TODO: I think this should be removable, except that currently the way + # that from_unique_ptr is implemented is probably dereferencing this in an + # invalid state. See what the best way to fix that is. def __cinit__(self, *args, **kwargs): - self.mr = get_current_device_resource() + self.c_value = pylibcudf.Scalar() def __init__(self, value, dtype): """ @@ -85,7 +113,7 @@ cdef class DeviceScalar: dtype : dtype A NumPy dtype. """ - self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object') + dtype = dtype if dtype.kind != 'U' else cudf.dtype('object') if cudf.utils.utils.is_na_like(value): value = None @@ -108,10 +136,17 @@ cdef class DeviceScalar: pa_scalar = pa.scalar(value, type=pa_type) - # Note: This factory-like behavior in __init__ will be removed when - # migrating to pylibcudf. - cdef DeviceScalar obj = from_arrow_scalar(pa_scalar, self._dtype) - self.c_value.swap(obj.c_value) + data_type = None + if isinstance(dtype, cudf.core.dtypes.DecimalDtype): + tid = pylibcudf.TypeId.DECIMAL128 + if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype): + tid = pylibcudf.TypeId.DECIMAL32 + elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): + tid = pylibcudf.TypeId.DECIMAL64 + data_type = pylibcudf.DataType(tid, -dtype.scale) + + self.c_value = pylibcudf.Scalar.from_arrow(pa_scalar, data_type) + self._dtype = dtype def _to_host_scalar(self): is_datetime = self.dtype.kind == "M" @@ -119,7 +154,8 @@ cdef class DeviceScalar: null_type = NaT if is_datetime or is_timedelta else NA - ps = to_arrow_scalar(self) + metadata = gather_metadata({"": self.dtype})[0] + ps = self.c_value.to_arrow(metadata) if not ps.is_valid: return null_type @@ -158,13 +194,13 @@ cdef class DeviceScalar: return self._to_host_scalar() cdef const scalar* get_raw_ptr(self) except *: - return self.c_value.get() + return self.c_value.c_obj.get() cpdef bool is_valid(self): """ Returns if the Scalar is valid or not(i.e., ). """ - return self.get_raw_ptr()[0].is_valid() + return self.c_value.is_valid() def __repr__(self): if cudf.utils.utils.is_na_like(self.value): @@ -183,7 +219,7 @@ cdef class DeviceScalar: cdef DeviceScalar s = DeviceScalar.__new__(DeviceScalar) cdef libcudf_types.data_type cdtype - s.c_value = move(ptr) + s.c_value = pylibcudf.Scalar.from_libcudf(move(ptr)) cdtype = s.get_raw_ptr()[0].type() if dtype is not None: @@ -310,9 +346,9 @@ def _create_proxy_nat_scalar(dtype): if dtype.char in 'mM': nat = dtype.type('NaT').astype(dtype) if dtype.type == np.datetime64: - _set_datetime64_from_np_scalar(result.c_value, nat, dtype, True) + _set_datetime64_from_np_scalar(result.c_value.c_obj, nat, dtype, True) elif dtype.type == np.timedelta64: - _set_timedelta64_from_np_scalar(result.c_value, nat, dtype, True) + _set_timedelta64_from_np_scalar(result.c_value.c_obj, nat, dtype, True) return result else: raise TypeError('NAT only valid for datetime and timedelta') diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index a5e87a456cb..fc11f047ab4 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -40,6 +40,14 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That requirement will go away once all +# scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() add_subdirectory(convert) add_subdirectory(split) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index 434f79d3b5f..f55bb1fb780 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -22,3 +22,11 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That requirement will go away once all +# scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 59a22c06e85..2f2063482af 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -20,3 +20,11 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That requirement will go away once all +# scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() From e28017cc17d2feb050d2effd4ebafb84600fd607 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Mon, 9 Oct 2023 10:05:12 -0500 Subject: [PATCH 45/76] Cleanup of namespaces in parquet code. (#14259) Cleans up several issues in the parquet code: - We were using the namespace `cudf::io::detail::parquet`, when `cudf::io::parquet::detail` makes more sense. - Converts the `cudf::io::parquet::gpu` namespace to also just use `cudf::io::parquet::detail` - Several detail-style headers and source files were using `cudf::io::parquet` when they should probably have been in the detail namespace. Authors: - https://github.com/nvdbaranec Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/14259 --- cpp/include/cudf/io/detail/parquet.hpp | 8 +- cpp/include/cudf/io/parquet.hpp | 4 +- cpp/src/io/functions.cpp | 4 +- cpp/src/io/parquet/chunk_dict.cu | 19 +- .../io/parquet/compact_protocol_reader.cpp | 8 +- .../io/parquet/compact_protocol_reader.hpp | 9 +- .../io/parquet/compact_protocol_writer.cpp | 8 +- .../io/parquet/compact_protocol_writer.hpp | 8 +- cpp/src/io/parquet/decode_preprocess.cu | 10 +- cpp/src/io/parquet/delta_binary.cuh | 4 +- cpp/src/io/parquet/page_data.cu | 12 +- cpp/src/io/parquet/page_decode.cuh | 4 +- cpp/src/io/parquet/page_delta_decode.cu | 6 +- cpp/src/io/parquet/page_enc.cu | 22 +- cpp/src/io/parquet/page_hdr.cu | 14 +- cpp/src/io/parquet/page_string_decode.cu | 14 +- cpp/src/io/parquet/page_string_utils.cuh | 4 +- cpp/src/io/parquet/parquet.hpp | 9 +- cpp/src/io/parquet/parquet_common.hpp | 9 +- cpp/src/io/parquet/parquet_gpu.cuh | 4 +- cpp/src/io/parquet/parquet_gpu.hpp | 27 +- cpp/src/io/parquet/predicate_pushdown.cpp | 14 +- cpp/src/io/parquet/reader.cpp | 4 +- cpp/src/io/parquet/reader_impl.cpp | 36 +-- cpp/src/io/parquet/reader_impl.hpp | 12 +- cpp/src/io/parquet/reader_impl_helpers.cpp | 121 ++++---- cpp/src/io/parquet/reader_impl_helpers.hpp | 21 +- cpp/src/io/parquet/reader_impl_preprocess.cu | 259 +++++++++--------- cpp/src/io/parquet/rle_stream.cuh | 4 +- cpp/src/io/parquet/writer_impl.cu | 223 ++++++++------- cpp/src/io/parquet/writer_impl.hpp | 28 +- cpp/tests/io/parquet_test.cpp | 207 +++++++------- 32 files changed, 531 insertions(+), 605 deletions(-) diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 074f690d2c7..0b8ee9676de 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -38,7 +38,7 @@ class parquet_reader_options; class parquet_writer_options; class chunked_parquet_writer_options; -namespace detail::parquet { +namespace parquet::detail { /** * @brief Class to read Parquet dataset data into columns. @@ -186,7 +186,7 @@ class writer { */ explicit writer(std::vector> sinks, parquet_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -201,7 +201,7 @@ class writer { */ explicit writer(std::vector> sinks, chunked_parquet_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -250,5 +250,5 @@ class writer { * metadata. */ parquet_metadata read_parquet_metadata(host_span const> sources); -} // namespace detail::parquet +} // namespace parquet::detail } // namespace cudf::io diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index deaf23d405a..6283099e700 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -499,7 +499,7 @@ class chunked_parquet_reader { [[nodiscard]] table_with_metadata read_chunk() const; private: - std::unique_ptr reader; + std::unique_ptr reader; }; /** @} */ // end of group @@ -1750,7 +1750,7 @@ class parquet_chunked_writer { std::vector const& column_chunks_file_paths = {}); /// Unique pointer to impl writer class - std::unique_ptr writer; + std::unique_ptr writer; }; /** @} */ // end of group diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 392a7850886..726442d752e 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -470,8 +470,8 @@ void orc_chunked_writer::close() writer->close(); } -using namespace cudf::io::detail::parquet; -namespace detail_parquet = cudf::io::detail::parquet; +using namespace cudf::io::parquet::detail; +namespace detail_parquet = cudf::io::parquet::detail; table_with_metadata read_parquet(parquet_reader_options const& options, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index 9ff1869edde..53ff31ab0a7 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -24,10 +24,8 @@ #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { + namespace { constexpr int DEFAULT_BLOCK_SIZE = 256; } @@ -101,7 +99,7 @@ struct map_find_fn { template __global__ void __launch_bounds__(block_size) - populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan frags) + populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; auto block_x = blockIdx.x; @@ -226,7 +224,7 @@ __global__ void __launch_bounds__(block_size) template __global__ void __launch_bounds__(block_size) - get_dictionary_indices_kernel(cudf::detail::device_2dspan frags) + get_dictionary_indices_kernel(cudf::detail::device_2dspan frags) { auto col_idx = blockIdx.y; auto block_x = blockIdx.x; @@ -276,7 +274,7 @@ void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_st <<>>(chunks); } -void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, +void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { dim3 const dim_grid(frags.size().second, frags.size().first); @@ -290,14 +288,11 @@ void collect_map_entries(device_span chunks, rmm::cuda_stream_vi collect_map_entries_kernel<<>>(chunks); } -void get_dictionary_indices(cudf::detail::device_2dspan frags, +void get_dictionary_indices(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { dim3 const dim_grid(frags.size().second, frags.size().first); get_dictionary_indices_kernel <<>>(frags); } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 5c7b8ca3f8c..81d1be64a45 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -21,9 +21,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { /** * @brief Base class for parquet field functors. @@ -870,6 +868,4 @@ int CompactProtocolReader::WalkSchema( } } -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp index 619815db503..cbb4161b138 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.hpp +++ b/cpp/src/io/parquet/compact_protocol_reader.hpp @@ -25,9 +25,8 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { + /** * @brief Class for parsing Parquet's Thrift Compact Protocol encoded metadata * @@ -147,6 +146,4 @@ class CompactProtocolReader { friend class parquet_field_struct_blob; }; -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index 60bc8984d81..9adc8767880 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -16,9 +16,7 @@ #include "compact_protocol_writer.hpp" -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { /** * @brief Parquet CompactProtocolWriter class @@ -391,6 +389,4 @@ inline void CompactProtocolFieldWriter::set_current_field(int const& field) current_field_value = field; } -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index 26d66527aa5..4849a814b14 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -25,9 +25,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { /** * @brief Class for parsing Parquet's Thrift Compact Protocol encoded metadata @@ -115,6 +113,4 @@ class CompactProtocolFieldWriter { inline void set_current_field(int const& field); }; -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/decode_preprocess.cu b/cpp/src/io/parquet/decode_preprocess.cu index 8de3702bc2e..544c93ee616 100644 --- a/cpp/src/io/parquet/decode_preprocess.cu +++ b/cpp/src/io/parquet/decode_preprocess.cu @@ -23,10 +23,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { namespace { @@ -411,7 +408,4 @@ void ComputePageSizes(cudf::detail::hostdevice_vector& pages, } } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/delta_binary.cuh b/cpp/src/io/parquet/delta_binary.cuh index 2382e4aafdf..a513e6674b4 100644 --- a/cpp/src/io/parquet/delta_binary.cuh +++ b/cpp/src/io/parquet/delta_binary.cuh @@ -18,7 +18,7 @@ #include "page_decode.cuh" -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { // DELTA_XXX encoding support // @@ -291,4 +291,4 @@ struct delta_binary_decoder { } }; -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 230834632dd..cce3659b902 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -23,10 +23,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { namespace { @@ -624,7 +621,7 @@ uint32_t GetAggregatedDecodeKernelMask(cudf::detail::hostdevice_vector } /** - * @copydoc cudf::io::parquet::gpu::DecodePageData + * @copydoc cudf::io::parquet::detail::DecodePageData */ void __host__ DecodePageData(cudf::detail::hostdevice_vector& pages, cudf::detail::hostdevice_vector const& chunks, @@ -648,7 +645,4 @@ void __host__ DecodePageData(cudf::detail::hostdevice_vector& pages, } } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index d70cabdd35f..7c866fd8b9e 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -24,7 +24,7 @@ #include #include -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { struct page_state_s { constexpr page_state_s() noexcept {} @@ -1384,4 +1384,4 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, return true; } -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index 2b78dead205..d25684a59f3 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -23,7 +23,7 @@ #include #include -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { namespace { @@ -160,7 +160,7 @@ __global__ void __launch_bounds__(96) } // anonymous namespace /** - * @copydoc cudf::io::parquet::gpu::DecodeDeltaBinary + * @copydoc cudf::io::parquet::detail::DecodeDeltaBinary */ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages, cudf::detail::hostdevice_vector const& chunks, @@ -184,4 +184,4 @@ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages } } -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index fe0dbb85124..78873d5e8ca 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -41,10 +41,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { namespace { @@ -329,7 +326,7 @@ __global__ void __launch_bounds__(128) // blockDim {128,1,1} __global__ void __launch_bounds__(128) gpuInitPages(device_2dspan chunks, - device_span pages, + device_span pages, device_span page_sizes, device_span comp_page_sizes, device_span col_desc, @@ -998,7 +995,7 @@ __device__ auto julian_days_with_time(int64_t v) // blockDim(128, 1, 1) template __global__ void __launch_bounds__(128, 8) - gpuEncodePages(device_span pages, + gpuEncodePages(device_span pages, device_span> comp_in, device_span> comp_out, device_span comp_results, @@ -1988,7 +1985,7 @@ __global__ void __launch_bounds__(128) // blockDim(1024, 1, 1) __global__ void __launch_bounds__(1024) - gpuGatherPages(device_span chunks, device_span pages) + gpuGatherPages(device_span chunks, device_span pages) { __shared__ __align__(8) EncColumnChunk ck_g; __shared__ __align__(8) EncPage page_g; @@ -2265,7 +2262,7 @@ void InitFragmentStatistics(device_span groups, } void InitEncoderPages(device_2dspan chunks, - device_span pages, + device_span pages, device_span page_sizes, device_span comp_page_sizes, device_span col_desc, @@ -2294,7 +2291,7 @@ void InitEncoderPages(device_2dspan chunks, write_v2_headers); } -void EncodePages(device_span pages, +void EncodePages(device_span pages, bool write_v2_headers, device_span> comp_in, device_span> comp_out, @@ -2328,7 +2325,7 @@ void EncodePageHeaders(device_span pages, } void GatherPages(device_span chunks, - device_span pages, + device_span pages, rmm::cuda_stream_view stream) { gpuGatherPages<<>>(chunks, pages); @@ -2343,7 +2340,4 @@ void EncodeColumnIndexes(device_span chunks, chunks, column_stats, column_index_truncate_length); } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 6f8b2f50443..eae8e05e61e 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -20,10 +20,8 @@ #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { + // Minimal thrift implementation for parsing page headers // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md @@ -161,8 +159,7 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) * @param chunk Column chunk the page belongs to * @return `kernel_mask_bits` value for the given page */ -__device__ uint32_t kernel_mask_for_page(gpu::PageInfo const& page, - gpu::ColumnChunkDesc const& chunk) +__device__ uint32_t kernel_mask_for_page(PageInfo const& page, ColumnChunkDesc const& chunk) { if (page.flags & PAGEINFO_FLAGS_DICTIONARY) { return 0; } @@ -528,7 +525,4 @@ void __host__ BuildStringDictionaryIndex(ColumnChunkDesc* chunks, gpuBuildStringDictionaryIndex<<>>(chunks, num_chunks); } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index d79abe4a6d2..4d79770ec34 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -20,10 +20,7 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { -namespace gpu { +namespace cudf::io::parquet::detail { namespace { @@ -757,7 +754,7 @@ __global__ void __launch_bounds__(decode_block_size) } // anonymous namespace /** - * @copydoc cudf::io::parquet::gpu::ComputePageStringSizes + * @copydoc cudf::io::parquet::detail::ComputePageStringSizes */ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, cudf::detail::hostdevice_vector const& chunks, @@ -778,7 +775,7 @@ void ComputePageStringSizes(cudf::detail::hostdevice_vector& pages, } /** - * @copydoc cudf::io::parquet::gpu::DecodeStringPageData + * @copydoc cudf::io::parquet::detail::DecodeStringPageData */ void __host__ DecodeStringPageData(cudf::detail::hostdevice_vector& pages, cudf::detail::hostdevice_vector const& chunks, @@ -802,7 +799,4 @@ void __host__ DecodeStringPageData(cudf::detail::hostdevice_vector& pa } } -} // namespace gpu -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_string_utils.cuh b/cpp/src/io/parquet/page_string_utils.cuh index 9395599b3ff..a81d0a64466 100644 --- a/cpp/src/io/parquet/page_string_utils.cuh +++ b/cpp/src/io/parquet/page_string_utils.cuh @@ -18,7 +18,7 @@ #include -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { // stole this from cudf/strings/detail/gather.cuh. modified to run on a single string on one warp. // copies from src to dst in 16B chunks per thread. @@ -107,4 +107,4 @@ __device__ void block_excl_sum(size_type* arr, size_type length, size_type initi } } -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 1df49262e87..c5993d73dec 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -25,9 +25,8 @@ #include #include -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { + constexpr uint32_t parquet_magic = (('P' << 0) | ('A' << 8) | ('R' << 16) | ('1' << 24)); /** @@ -405,6 +404,4 @@ static inline int CountLeadingZeros32(uint32_t value) #endif } -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_common.hpp b/cpp/src/io/parquet/parquet_common.hpp index 5a1716bb547..50736197eb9 100644 --- a/cpp/src/io/parquet/parquet_common.hpp +++ b/cpp/src/io/parquet/parquet_common.hpp @@ -18,9 +18,8 @@ #include -namespace cudf { -namespace io { -namespace parquet { +namespace cudf::io::parquet::detail { + // Max decimal precisions according to the parquet spec: // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal auto constexpr MAX_DECIMAL32_PRECISION = 9; @@ -156,6 +155,4 @@ enum FieldType { ST_FLD_STRUCT = 12, }; -} // namespace parquet -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh index dc74bee1536..10e12ebb782 100644 --- a/cpp/src/io/parquet/parquet_gpu.cuh +++ b/cpp/src/io/parquet/parquet_gpu.cuh @@ -23,7 +23,7 @@ #include -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { auto constexpr KEY_SENTINEL = size_type{-1}; auto constexpr VALUE_SENTINEL = size_type{-1}; @@ -81,4 +81,4 @@ inline size_type __device__ row_to_value_idx(size_type idx, return idx; } -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 51c862b376b..767668cc65e 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -35,7 +35,7 @@ #include -namespace cudf::io::parquet { +namespace cudf::io::parquet::detail { using cudf::io::detail::string_index_pair; @@ -88,8 +88,6 @@ struct input_column_info { auto nesting_depth() const { return nesting.size(); } }; -namespace gpu { - /** * @brief Enums for the flags in the page header */ @@ -347,7 +345,7 @@ struct file_intermediate_data { // all chunks from the selected row groups. We may end up reading these chunks progressively // instead of all at once - std::vector chunks{}; + std::vector chunks{}; // skip_rows/num_rows values for the entire file. these need to be adjusted per-pass because we // may not be visiting every row group that contains these bounds @@ -372,16 +370,16 @@ struct pass_intermediate_data { // rowgroup, chunk and page information for the current pass. std::vector row_groups{}; - cudf::detail::hostdevice_vector chunks{}; - cudf::detail::hostdevice_vector pages_info{}; - cudf::detail::hostdevice_vector page_nesting_info{}; - cudf::detail::hostdevice_vector page_nesting_decode_info{}; + cudf::detail::hostdevice_vector chunks{}; + cudf::detail::hostdevice_vector pages_info{}; + cudf::detail::hostdevice_vector page_nesting_info{}; + cudf::detail::hostdevice_vector page_nesting_decode_info{}; rmm::device_uvector page_keys{0, rmm::cuda_stream_default}; rmm::device_uvector page_index{0, rmm::cuda_stream_default}; rmm::device_uvector str_dict_index{0, rmm::cuda_stream_default}; - std::vector output_chunk_read_info; + std::vector output_chunk_read_info; std::size_t current_output_chunk{0}; rmm::device_buffer level_decode_data{}; @@ -739,7 +737,7 @@ void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_st * @param frags Column fragments * @param stream CUDA stream to use */ -void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, +void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream); /** @@ -762,7 +760,7 @@ void collect_map_entries(device_span chunks, rmm::cuda_stream_vi * @param frags Column fragments * @param stream CUDA stream to use */ -void get_dictionary_indices(cudf::detail::device_2dspan frags, +void get_dictionary_indices(cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream); /** @@ -781,7 +779,7 @@ void get_dictionary_indices(cudf::detail::device_2dspan * @param[in] stream CUDA stream to use */ void InitEncoderPages(cudf::detail::device_2dspan chunks, - device_span pages, + device_span pages, device_span page_sizes, device_span comp_page_sizes, device_span col_desc, @@ -847,7 +845,7 @@ void EncodePageHeaders(device_span pages, * @param[in] stream CUDA stream to use */ void GatherPages(device_span chunks, - device_span pages, + device_span pages, rmm::cuda_stream_view stream); /** @@ -863,5 +861,4 @@ void EncodeColumnIndexes(device_span chunks, int32_t column_index_truncate_length, rmm::cuda_stream_view stream); -} // namespace gpu -} // namespace cudf::io::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index 805d082c71e..9083be1c2dd 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -35,7 +35,7 @@ #include #include -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { namespace { /** @@ -62,13 +62,13 @@ struct stats_caster { // uses storage type as T template () or cudf::is_nested())> - static T convert(uint8_t const* stats_val, size_t stats_size, cudf::io::parquet::Type const type) + static T convert(uint8_t const* stats_val, size_t stats_size, Type const type) { CUDF_FAIL("unsupported type for stats casting"); } template ())> - static T convert(uint8_t const* stats_val, size_t stats_size, cudf::io::parquet::Type const type) + static T convert(uint8_t const* stats_val, size_t stats_size, Type const type) { CUDF_EXPECTS(type == BOOLEAN, "Invalid type and stats combination"); return targetType(*reinterpret_cast(stats_val)); @@ -78,7 +78,7 @@ struct stats_caster { template () and !cudf::is_boolean()) or cudf::is_fixed_point() or cudf::is_chrono())> - static T convert(uint8_t const* stats_val, size_t stats_size, cudf::io::parquet::Type const type) + static T convert(uint8_t const* stats_val, size_t stats_size, Type const type) { switch (type) { case INT32: return targetType(*reinterpret_cast(stats_val)); @@ -103,7 +103,7 @@ struct stats_caster { } template ())> - static T convert(uint8_t const* stats_val, size_t stats_size, cudf::io::parquet::Type const type) + static T convert(uint8_t const* stats_val, size_t stats_size, Type const type) { switch (type) { case FLOAT: return targetType(*reinterpret_cast(stats_val)); @@ -113,7 +113,7 @@ struct stats_caster { } template )> - static T convert(uint8_t const* stats_val, size_t stats_size, cudf::io::parquet::Type const type) + static T convert(uint8_t const* stats_val, size_t stats_size, Type const type) { switch (type) { case BYTE_ARRAY: [[fallthrough]]; @@ -527,4 +527,4 @@ named_to_reference_converter::visit_operands( return transformed_operands; } -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index 1e87447006d..17d7c07bc91 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -16,7 +16,7 @@ #include "reader_impl.hpp" -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { reader::reader() = default; @@ -59,4 +59,4 @@ bool chunked_reader::has_next() const { return _impl->has_next(); } table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); } -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index ea40f29a070..26ec83d5946 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -25,7 +25,7 @@ #include #include -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) { @@ -38,7 +38,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); size_t const sum_max_depths = std::accumulate( - chunks.begin(), chunks.end(), 0, [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) { + chunks.begin(), chunks.end(), 0, [&](size_t cursum, ColumnChunkDesc const& chunk) { return cursum + _metadata->get_output_nesting_depth(chunk.src_col_schema); }); @@ -51,10 +51,10 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) // doing a gather operation later on. // TODO: This step is somewhat redundant if size info has already been calculated (nested schema, // chunked reader). - auto const has_strings = (kernel_mask & gpu::KERNEL_MASK_STRING) != 0; + auto const has_strings = (kernel_mask & KERNEL_MASK_STRING) != 0; std::vector col_sizes(_input_columns.size(), 0L); if (has_strings) { - gpu::ComputePageStringSizes( + ComputePageStringSizes( pages, chunks, skip_rows, num_rows, _pass_itm_data->level_type_size, _stream); col_sizes = calculate_page_string_offsets(); @@ -176,19 +176,19 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) if (has_strings) { auto& stream = streams[s_idx++]; chunk_nested_str_data.host_to_device_async(stream); - gpu::DecodeStringPageData( + DecodeStringPageData( pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), stream); } // launch delta binary decoder - if ((kernel_mask & gpu::KERNEL_MASK_DELTA_BINARY) != 0) { - gpu::DecodeDeltaBinary( + if ((kernel_mask & KERNEL_MASK_DELTA_BINARY) != 0) { + DecodeDeltaBinary( pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), streams[s_idx++]); } // launch the catch-all page decoder - if ((kernel_mask & gpu::KERNEL_MASK_GENERAL) != 0) { - gpu::DecodePageData( + if ((kernel_mask & KERNEL_MASK_GENERAL) != 0) { + DecodePageData( pages, chunks, num_rows, skip_rows, level_type_size, error_code.data(), streams[s_idx++]); } @@ -248,13 +248,13 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) // update null counts in the final column buffers for (size_t idx = 0; idx < pages.size(); idx++) { - gpu::PageInfo* pi = &pages[idx]; - if (pi->flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; } - gpu::ColumnChunkDesc* col = &chunks[pi->chunk_idx]; + PageInfo* pi = &pages[idx]; + if (pi->flags & PAGEINFO_FLAGS_DICTIONARY) { continue; } + ColumnChunkDesc* col = &chunks[pi->chunk_idx]; input_column_info const& input_col = _input_columns[col->src_col_index]; - int index = pi->nesting_decode - page_nesting_decode.device_ptr(); - gpu::PageNestingDecodeInfo* pndi = &page_nesting_decode[index]; + int index = pi->nesting_decode - page_nesting_decode.device_ptr(); + PageNestingDecodeInfo* pndi = &page_nesting_decode[index]; auto* cols = &_output_buffers; for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) { @@ -320,7 +320,7 @@ reader::impl::impl(std::size_t chunk_read_limit, // Save the states of the output buffers for reuse in `chunk_read()`. for (auto const& buff : _output_buffers) { - _output_buffers_template.emplace_back(inline_column_buffer::empty_like(buff)); + _output_buffers_template.emplace_back(cudf::io::detail::inline_column_buffer::empty_like(buff)); } } @@ -368,7 +368,7 @@ void reader::impl::prepare_data(int64_t skip_rows, // always create the pass struct, even if we end up with no passes. // this will also cause the previous pass information to be deleted - _pass_itm_data = std::make_unique(); + _pass_itm_data = std::make_unique(); if (_file_itm_data.global_num_rows > 0 && not _file_itm_data.row_groups.empty() && not _input_columns.empty() && _current_input_pass < num_passes) { @@ -521,7 +521,7 @@ table_with_metadata reader::impl::read_chunk() if (_chunk_count > 0) { _output_buffers.resize(0); for (auto const& buff : _output_buffers_template) { - _output_buffers.emplace_back(inline_column_buffer::empty_like(buff)); + _output_buffers.emplace_back(cudf::io::detail::inline_column_buffer::empty_like(buff)); } } @@ -571,4 +571,4 @@ parquet_metadata read_parquet_metadata(host_span con metadata.get_key_value_metadata()[0]}; } -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 9445e4d1648..6003b931b04 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -35,7 +35,7 @@ #include #include -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { /** * @brief Implementation for Parquet reader @@ -261,10 +261,10 @@ class reader::impl { std::vector _input_columns; // Buffers for generating output columns - std::vector _output_buffers; + std::vector _output_buffers; // Buffers copied from `_output_buffers` after construction for reuse - std::vector _output_buffers_template; + std::vector _output_buffers_template; // _output_buffers associated schema indices std::vector _output_column_schemas; @@ -285,8 +285,8 @@ class reader::impl { // Within a pass, we produce one or more chunks of output, whose maximum total // byte size is controlled by _output_chunk_read_limit. - cudf::io::parquet::gpu::file_intermediate_data _file_itm_data; - std::unique_ptr _pass_itm_data; + file_intermediate_data _file_itm_data; + std::unique_ptr _pass_itm_data; // an array of offsets into _file_itm_data::global_chunks. Each pair of offsets represents // the start/end of the chunks to be loaded for a given pass. @@ -301,4 +301,4 @@ class reader::impl { bool _file_preprocessed{false}; }; -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 9778cfc47d2..171cf07da3e 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -21,34 +21,34 @@ #include #include -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { namespace { ConvertedType logical_type_to_converted_type(LogicalType const& logical) { if (logical.isset.STRING) { - return parquet::UTF8; + return UTF8; } else if (logical.isset.MAP) { - return parquet::MAP; + return MAP; } else if (logical.isset.LIST) { - return parquet::LIST; + return LIST; } else if (logical.isset.ENUM) { - return parquet::ENUM; + return ENUM; } else if (logical.isset.DECIMAL) { - return parquet::DECIMAL; // TODO set decimal values + return DECIMAL; // TODO set decimal values } else if (logical.isset.DATE) { - return parquet::DATE; + return DATE; } else if (logical.isset.TIME) { if (logical.TIME.unit.isset.MILLIS) - return parquet::TIME_MILLIS; + return TIME_MILLIS; else if (logical.TIME.unit.isset.MICROS) - return parquet::TIME_MICROS; + return TIME_MICROS; } else if (logical.isset.TIMESTAMP) { if (logical.TIMESTAMP.unit.isset.MILLIS) - return parquet::TIMESTAMP_MILLIS; + return TIMESTAMP_MILLIS; else if (logical.TIMESTAMP.unit.isset.MICROS) - return parquet::TIMESTAMP_MICROS; + return TIMESTAMP_MICROS; } else if (logical.isset.INTEGER) { switch (logical.INTEGER.bitWidth) { case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8; @@ -58,13 +58,13 @@ ConvertedType logical_type_to_converted_type(LogicalType const& logical) default: break; } } else if (logical.isset.UNKNOWN) { - return parquet::NA; + return NA; } else if (logical.isset.JSON) { - return parquet::JSON; + return JSON; } else if (logical.isset.BSON) { - return parquet::BSON; + return BSON; } - return parquet::UNKNOWN; + return UNKNOWN; } } // namespace @@ -76,39 +76,39 @@ type_id to_type_id(SchemaElement const& schema, bool strings_to_categorical, type_id timestamp_type_id) { - parquet::Type const physical = schema.type; - parquet::LogicalType const logical_type = schema.logical_type; - parquet::ConvertedType converted_type = schema.converted_type; - int32_t decimal_precision = schema.decimal_precision; + Type const physical = schema.type; + LogicalType const logical_type = schema.logical_type; + ConvertedType converted_type = schema.converted_type; + int32_t decimal_precision = schema.decimal_precision; // Logical type used for actual data interpretation; the legacy converted type // is superseded by 'logical' type whenever available. auto const inferred_converted_type = logical_type_to_converted_type(logical_type); - if (inferred_converted_type != parquet::UNKNOWN) { converted_type = inferred_converted_type; } - if (inferred_converted_type == parquet::DECIMAL) { + if (inferred_converted_type != UNKNOWN) { converted_type = inferred_converted_type; } + if (inferred_converted_type == DECIMAL) { decimal_precision = schema.logical_type.DECIMAL.precision; } switch (converted_type) { - case parquet::UINT_8: return type_id::UINT8; - case parquet::INT_8: return type_id::INT8; - case parquet::UINT_16: return type_id::UINT16; - case parquet::INT_16: return type_id::INT16; - case parquet::UINT_32: return type_id::UINT32; - case parquet::UINT_64: return type_id::UINT64; - case parquet::DATE: return type_id::TIMESTAMP_DAYS; - case parquet::TIME_MILLIS: return type_id::DURATION_MILLISECONDS; - case parquet::TIME_MICROS: return type_id::DURATION_MICROSECONDS; - case parquet::TIMESTAMP_MILLIS: + case UINT_8: return type_id::UINT8; + case INT_8: return type_id::INT8; + case UINT_16: return type_id::UINT16; + case INT_16: return type_id::INT16; + case UINT_32: return type_id::UINT32; + case UINT_64: return type_id::UINT64; + case DATE: return type_id::TIMESTAMP_DAYS; + case TIME_MILLIS: return type_id::DURATION_MILLISECONDS; + case TIME_MICROS: return type_id::DURATION_MICROSECONDS; + case TIMESTAMP_MILLIS: return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id : type_id::TIMESTAMP_MILLISECONDS; - case parquet::TIMESTAMP_MICROS: + case TIMESTAMP_MICROS: return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id : type_id::TIMESTAMP_MICROSECONDS; - case parquet::DECIMAL: - if (physical == parquet::INT32) { return type_id::DECIMAL32; } - if (physical == parquet::INT64) { return type_id::DECIMAL64; } - if (physical == parquet::FIXED_LEN_BYTE_ARRAY) { + case DECIMAL: + if (physical == INT32) { return type_id::DECIMAL32; } + if (physical == INT64) { return type_id::DECIMAL64; } + if (physical == FIXED_LEN_BYTE_ARRAY) { if (schema.type_length <= static_cast(sizeof(int32_t))) { return type_id::DECIMAL32; } @@ -119,7 +119,7 @@ type_id to_type_id(SchemaElement const& schema, return type_id::DECIMAL128; } } - if (physical == parquet::BYTE_ARRAY) { + if (physical == BYTE_ARRAY) { CUDF_EXPECTS(decimal_precision <= MAX_DECIMAL128_PRECISION, "Invalid decimal precision"); if (decimal_precision <= MAX_DECIMAL32_PRECISION) { return type_id::DECIMAL32; @@ -133,20 +133,20 @@ type_id to_type_id(SchemaElement const& schema, break; // maps are just List>. - case parquet::MAP: - case parquet::LIST: return type_id::LIST; - case parquet::NA: return type_id::STRING; + case MAP: + case LIST: return type_id::LIST; + case NA: return type_id::STRING; // return type_id::EMPTY; //TODO(kn): enable after Null/Empty column support default: break; } - if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and + if (inferred_converted_type == UNKNOWN and physical == INT64 and logical_type.TIMESTAMP.unit.isset.NANOS) { return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id : type_id::TIMESTAMP_NANOSECONDS; } - if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and + if (inferred_converted_type == UNKNOWN and physical == INT64 and logical_type.TIME.unit.isset.NANOS) { return type_id::DURATION_NANOSECONDS; } @@ -157,16 +157,16 @@ type_id to_type_id(SchemaElement const& schema, // Physical storage type supported by Parquet; controls the on-disk storage // format in combination with the encoding type. switch (physical) { - case parquet::BOOLEAN: return type_id::BOOL8; - case parquet::INT32: return type_id::INT32; - case parquet::INT64: return type_id::INT64; - case parquet::FLOAT: return type_id::FLOAT32; - case parquet::DOUBLE: return type_id::FLOAT64; - case parquet::BYTE_ARRAY: - case parquet::FIXED_LEN_BYTE_ARRAY: + case BOOLEAN: return type_id::BOOL8; + case INT32: return type_id::INT32; + case INT64: return type_id::INT64; + case FLOAT: return type_id::FLOAT32; + case DOUBLE: return type_id::FLOAT64; + case BYTE_ARRAY: + case FIXED_LEN_BYTE_ARRAY: // Can be mapped to INT32 (32-bit hash) or STRING return strings_to_categorical ? type_id::INT32 : type_id::STRING; - case parquet::INT96: + case INT96: return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id : type_id::TIMESTAMP_NANOSECONDS; default: break; @@ -420,7 +420,7 @@ std::vector aggregate_reader_metadata::get_pandas_index_names() con return names; } -std::tuple> +std::tuple> aggregate_reader_metadata::select_row_groups( host_span const> row_group_indices, int64_t skip_rows_opt, @@ -438,7 +438,7 @@ aggregate_reader_metadata::select_row_groups( host_span const>(filtered_row_group_indices.value()); } } - std::vector selection; + std::vector selection; auto [rows_to_skip, rows_to_read] = [&]() { if (not row_group_indices.empty()) { return std::pair{}; } auto const from_opts = cudf::io::detail::skip_rows_num_rows_from_options( @@ -478,7 +478,7 @@ aggregate_reader_metadata::select_row_groups( } std::tuple, - std::vector, + std::vector, std::vector> aggregate_reader_metadata::select_columns(std::optional> const& use_names, bool include_index, @@ -496,17 +496,18 @@ aggregate_reader_metadata::select_columns(std::optional : -1; }; - std::vector output_columns; + std::vector output_columns; std::vector input_columns; std::vector nesting; // Return true if column path is valid. e.g. if the path is {"struct1", "child1"}, then it is // valid if "struct1.child1" exists in this file's schema. If "struct1" exists but "child1" is // not a child of "struct1" then the function will return false for "struct1" - std::function&, bool)> + std::function&, bool)> build_column = [&](column_name_info const* col_name_info, int schema_idx, - std::vector& out_col_array, + std::vector& out_col_array, bool has_list_parent) { if (schema_idx < 0) { return false; } auto const& schema_elem = get_schema(schema_idx); @@ -529,7 +530,8 @@ aggregate_reader_metadata::select_columns(std::optional : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id); auto const dtype = to_data_type(col_type, schema_elem); - inline_column_buffer output_col(dtype, schema_elem.repetition_type == OPTIONAL); + cudf::io::detail::inline_column_buffer output_col(dtype, + schema_elem.repetition_type == OPTIONAL); if (has_list_parent) { output_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; } // store the index of this element if inserted in out_col_array nesting.push_back(static_cast(out_col_array.size())); @@ -569,7 +571,8 @@ aggregate_reader_metadata::select_columns(std::optional to_type_id(schema_elem, strings_to_categorical, timestamp_type_id); auto const element_dtype = to_data_type(element_type, schema_elem); - inline_column_buffer element_col(element_dtype, schema_elem.repetition_type == OPTIONAL); + cudf::io::detail::inline_column_buffer element_col( + element_dtype, schema_elem.repetition_type == OPTIONAL); if (has_list_parent || col_type == type_id::LIST) { element_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; } @@ -732,4 +735,4 @@ aggregate_reader_metadata::select_columns(std::optional std::move(input_columns), std::move(output_columns), std::move(output_column_schemas)); } -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 9ee17f26a10..1a73e2f55ac 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -32,9 +32,7 @@ #include #include -namespace cudf::io::detail::parquet { - -using namespace cudf::io::parquet; +namespace cudf::io::parquet::detail { /** * @brief Function that translates Parquet datatype to cuDF type enum @@ -182,7 +180,7 @@ class aggregate_reader_metadata { * @return A tuple of corrected row_start, row_count and list of row group indexes and its * starting row */ - [[nodiscard]] std::tuple> select_row_groups( + [[nodiscard]] std::tuple> select_row_groups( host_span const> row_group_indices, int64_t row_start, std::optional const& row_count, @@ -202,12 +200,13 @@ class aggregate_reader_metadata { * @return input column information, output column information, list of output column schema * indices */ - [[nodiscard]] std:: - tuple, std::vector, std::vector> - select_columns(std::optional> const& use_names, - bool include_index, - bool strings_to_categorical, - type_id timestamp_type_id) const; + [[nodiscard]] std::tuple, + std::vector, + std::vector> + select_columns(std::optional> const& use_names, + bool include_index, + bool strings_to_categorical, + type_id timestamp_type_id) const; }; /** @@ -276,4 +275,4 @@ class named_to_reference_converter : public ast::detail::expression_transformer std::list _operators; }; -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index c731c467f2c..4bc6bb6f43b 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -43,7 +43,8 @@ #include -namespace cudf::io::detail::parquet { +namespace cudf::io::parquet::detail { + namespace { /** @@ -185,11 +186,11 @@ template */ [[nodiscard]] std::tuple conversion_info(type_id column_type_id, type_id timestamp_type_id, - parquet::Type physical, + Type physical, int8_t converted, int32_t length) { - int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0; + int32_t type_width = (physical == FIXED_LEN_BYTE_ARRAY) ? length : 0; int32_t clock_rate = 0; if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) { type_width = 1; // I32 -> I8 @@ -202,9 +203,9 @@ template } int8_t converted_type = converted; - if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 && + if (converted_type == DECIMAL && column_type_id != type_id::FLOAT64 && not cudf::is_fixed_point(data_type{column_type_id})) { - converted_type = parquet::UNKNOWN; // Not converting to float64 or decimal + converted_type = UNKNOWN; // Not converting to float64 or decimal } return std::make_tuple(type_width, clock_rate, converted_type); } @@ -226,7 +227,7 @@ template [[nodiscard]] std::future read_column_chunks_async( std::vector> const& sources, std::vector>& page_data, - cudf::detail::hostdevice_vector& chunks, + cudf::detail::hostdevice_vector& chunks, size_t begin_chunk, size_t end_chunk, std::vector const& column_chunk_offsets, @@ -239,11 +240,10 @@ template size_t const io_offset = column_chunk_offsets[chunk]; size_t io_size = chunks[chunk].compressed_size; size_t next_chunk = chunk + 1; - bool const is_compressed = (chunks[chunk].codec != parquet::Compression::UNCOMPRESSED); + bool const is_compressed = (chunks[chunk].codec != Compression::UNCOMPRESSED); while (next_chunk < end_chunk) { - size_t const next_offset = column_chunk_offsets[next_chunk]; - bool const is_next_compressed = - (chunks[next_chunk].codec != parquet::Compression::UNCOMPRESSED); + size_t const next_offset = column_chunk_offsets[next_chunk]; + bool const is_next_compressed = (chunks[next_chunk].codec != Compression::UNCOMPRESSED); if (next_offset != io_offset + io_size || is_next_compressed != is_compressed || chunk_source_map[chunk] != chunk_source_map[next_chunk]) { // Can't merge if not contiguous or mixing compressed and uncompressed @@ -300,13 +300,13 @@ template * * @return The total number of pages */ -[[nodiscard]] size_t count_page_headers( - cudf::detail::hostdevice_vector& chunks, rmm::cuda_stream_view stream) +[[nodiscard]] size_t count_page_headers(cudf::detail::hostdevice_vector& chunks, + rmm::cuda_stream_view stream) { size_t total_pages = 0; chunks.host_to_device_async(stream); - gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); + DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); chunks.device_to_host_sync(stream); for (size_t c = 0; c < chunks.size(); c++) { @@ -337,8 +337,8 @@ constexpr bool is_supported_encoding(Encoding enc) * @param stream CUDA stream used for device memory operations and kernel launches * @returns The size in bytes of level type data required */ -int decode_page_headers(cudf::detail::hostdevice_vector& chunks, - cudf::detail::hostdevice_vector& pages, +int decode_page_headers(cudf::detail::hostdevice_vector& chunks, + cudf::detail::hostdevice_vector& pages, rmm::cuda_stream_view stream) { // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages), @@ -350,14 +350,14 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c } chunks.host_to_device_async(stream); - gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); + DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); // compute max bytes needed for level data auto level_bit_size = cudf::detail::make_counting_transform_iterator(0, [chunks = chunks.begin()] __device__(int i) { auto c = chunks[i]; return static_cast( - max(c.level_bits[gpu::level_type::REPETITION], c.level_bits[gpu::level_type::DEFINITION])); + max(c.level_bits[level_type::REPETITION], c.level_bits[level_type::DEFINITION])); }); // max level data bit size. int const max_level_bits = thrust::reduce(rmm::exec_policy(stream), @@ -388,11 +388,11 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c * @return Device buffer to decompressed page data */ [[nodiscard]] rmm::device_buffer decompress_page_data( - cudf::detail::hostdevice_vector& chunks, - cudf::detail::hostdevice_vector& pages, + cudf::detail::hostdevice_vector& chunks, + cudf::detail::hostdevice_vector& pages, rmm::cuda_stream_view stream) { - auto for_each_codec_page = [&](parquet::Compression codec, std::function const& f) { + auto for_each_codec_page = [&](Compression codec, std::function const& f) { for (size_t c = 0, page_count = 0; c < chunks.size(); c++) { const auto page_stride = chunks[c].max_num_pages; if (chunks[c].codec == codec) { @@ -412,19 +412,16 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c size_t total_decomp_size = 0; struct codec_stats { - parquet::Compression compression_type = UNCOMPRESSED; - size_t num_pages = 0; - int32_t max_decompressed_size = 0; - size_t total_decomp_size = 0; + Compression compression_type = UNCOMPRESSED; + size_t num_pages = 0; + int32_t max_decompressed_size = 0; + size_t total_decomp_size = 0; }; - std::array codecs{codec_stats{parquet::GZIP}, - codec_stats{parquet::SNAPPY}, - codec_stats{parquet::BROTLI}, - codec_stats{parquet::ZSTD}}; + std::array codecs{codec_stats{GZIP}, codec_stats{SNAPPY}, codec_stats{BROTLI}, codec_stats{ZSTD}}; auto is_codec_supported = [&codecs](int8_t codec) { - if (codec == parquet::UNCOMPRESSED) return true; + if (codec == UNCOMPRESSED) return true; return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) { return codec == cstats.compression_type; }) != codecs.end(); @@ -445,7 +442,7 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c codec.num_pages++; num_comp_pages++; }); - if (codec.compression_type == parquet::BROTLI && codec.num_pages > 0) { + if (codec.compression_type == BROTLI && codec.num_pages > 0) { debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), stream); } } @@ -482,7 +479,7 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c auto& page = pages[page_idx]; // offset will only be non-zero for V2 pages auto const offset = - page.lvl_bytes[gpu::level_type::DEFINITION] + page.lvl_bytes[gpu::level_type::REPETITION]; + page.lvl_bytes[level_type::DEFINITION] + page.lvl_bytes[level_type::REPETITION]; // for V2 need to copy def and rep level info into place, and then offset the // input and output buffers. otherwise we'd have to keep both the compressed // and decompressed data. @@ -509,11 +506,11 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c device_span d_comp_res_view(comp_res.data() + start_pos, codec.num_pages); switch (codec.compression_type) { - case parquet::GZIP: + case GZIP: gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, stream); break; - case parquet::SNAPPY: - if (nvcomp_integration::is_stable_enabled()) { + case SNAPPY: + if (cudf::io::detail::nvcomp_integration::is_stable_enabled()) { nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY, d_comp_in, d_comp_out, @@ -525,7 +522,7 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, stream); } break; - case parquet::ZSTD: + case ZSTD: nvcomp::batched_decompress(nvcomp::compression_type::ZSTD, d_comp_in, d_comp_out, @@ -534,7 +531,7 @@ int decode_page_headers(cudf::detail::hostdevice_vector& c codec.total_decomp_size, stream); break; - case parquet::BROTLI: + case BROTLI: gpu_debrotli(d_comp_in, d_comp_out, d_comp_res_view, @@ -594,9 +591,9 @@ void reader::impl::allocate_nesting_info() }); page_nesting_info = - cudf::detail::hostdevice_vector{total_page_nesting_infos, _stream}; + cudf::detail::hostdevice_vector{total_page_nesting_infos, _stream}; page_nesting_decode_info = - cudf::detail::hostdevice_vector{total_page_nesting_infos, _stream}; + cudf::detail::hostdevice_vector{total_page_nesting_infos, _stream}; // update pointers in the PageInfos int target_page_index = 0; @@ -653,10 +650,10 @@ void reader::impl::allocate_nesting_info() if (!cur_schema.is_stub()) { // initialize each page within the chunk for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) { - gpu::PageNestingInfo* pni = + PageNestingInfo* pni = &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)]; - gpu::PageNestingDecodeInfo* nesting_info = + PageNestingDecodeInfo* nesting_info = &page_nesting_decode_info[nesting_info_index + (p_idx * per_page_nesting_info_size)]; // if we have lists, set our start and end depth remappings @@ -717,9 +714,9 @@ void reader::impl::allocate_level_decode_space() for (size_t idx = 0; idx < pages.size(); idx++) { auto& p = pages[idx]; - p.lvl_decode_buf[gpu::level_type::DEFINITION] = buf; + p.lvl_decode_buf[level_type::DEFINITION] = buf; buf += (LEVEL_DECODE_BUF_SIZE * _pass_itm_data->level_type_size); - p.lvl_decode_buf[gpu::level_type::REPETITION] = buf; + p.lvl_decode_buf[level_type::REPETITION] = buf; buf += (LEVEL_DECODE_BUF_SIZE * _pass_itm_data->level_type_size); } } @@ -824,25 +821,25 @@ void reader::impl::load_global_chunk_info() schema.converted_type, schema.type_length); - chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size, - nullptr, - col_meta.num_values, - schema.type, - type_width, - row_group_start, - row_group_rows, - schema.max_definition_level, - schema.max_repetition_level, - _metadata->get_output_nesting_depth(col.schema_idx), - required_bits(schema.max_definition_level), - required_bits(schema.max_repetition_level), - col_meta.codec, - converted_type, - schema.logical_type, - schema.decimal_precision, - clock_rate, - i, - col.schema_idx)); + chunks.push_back(ColumnChunkDesc(col_meta.total_compressed_size, + nullptr, + col_meta.num_values, + schema.type, + type_width, + row_group_start, + row_group_rows, + schema.max_definition_level, + schema.max_repetition_level, + _metadata->get_output_nesting_depth(col.schema_idx), + required_bits(schema.max_definition_level), + required_bits(schema.max_repetition_level), + col_meta.codec, + converted_type, + schema.logical_type, + schema.decimal_precision, + clock_rate, + i, + col.schema_idx)); } remaining_rows -= row_group_rows; @@ -909,7 +906,7 @@ void reader::impl::compute_input_pass_row_group_info() void reader::impl::setup_pass() { // this will also cause the previous pass information to be deleted - _pass_itm_data = std::make_unique(); + _pass_itm_data = std::make_unique(); // setup row groups to be loaded for this pass auto const row_group_start = _input_pass_row_group_offsets[_current_input_pass]; @@ -929,8 +926,7 @@ void reader::impl::setup_pass() auto chunk_start = _file_itm_data.chunks.begin() + (row_group_start * chunks_per_rowgroup); auto chunk_end = _file_itm_data.chunks.begin() + (row_group_end * chunks_per_rowgroup); - _pass_itm_data->chunks = - cudf::detail::hostdevice_vector(num_chunks, _stream); + _pass_itm_data->chunks = cudf::detail::hostdevice_vector(num_chunks, _stream); std::copy(chunk_start, chunk_end, _pass_itm_data->chunks.begin()); // adjust skip_rows and num_rows by what's available in the row groups we are processing @@ -970,7 +966,7 @@ void reader::impl::load_and_decompress_data() // Process dataset chunk pages into output columns auto const total_pages = count_page_headers(chunks, _stream); if (total_pages <= 0) { return; } - pages = cudf::detail::hostdevice_vector(total_pages, total_pages, _stream); + pages = cudf::detail::hostdevice_vector(total_pages, total_pages, _stream); // decoding of column/page information _pass_itm_data->level_type_size = decode_page_headers(chunks, pages, _stream); @@ -978,7 +974,7 @@ void reader::impl::load_and_decompress_data() decomp_page_data = decompress_page_data(chunks, pages, _stream); // Free compressed data for (size_t c = 0; c < chunks.size(); c++) { - if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { raw_page_data[c].reset(); } + if (chunks[c].codec != Compression::UNCOMPRESSED) { raw_page_data[c].reset(); } } } @@ -1019,14 +1015,13 @@ struct cumulative_row_info { }; #if defined(PREPROCESS_DEBUG) -void print_pages(cudf::detail::hostdevice_vector& pages, - rmm::cuda_stream_view _stream) +void print_pages(cudf::detail::hostdevice_vector& pages, rmm::cuda_stream_view _stream) { pages.device_to_host_sync(_stream); for (size_t idx = 0; idx < pages.size(); idx++) { auto const& p = pages[idx]; // skip dictionary pages - if (p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; } + if (p.flags & PAGEINFO_FLAGS_DICTIONARY) { continue; } printf( "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d), " "str_bytes(%d)\n", @@ -1040,7 +1035,7 @@ void print_pages(cudf::detail::hostdevice_vector& pages, } } -void print_cumulative_page_info(cudf::detail::hostdevice_vector& pages, +void print_cumulative_page_info(cudf::detail::hostdevice_vector& pages, rmm::device_uvector const& page_index, rmm::device_uvector const& c_info, rmm::cuda_stream_view stream) @@ -1067,7 +1062,7 @@ void print_cumulative_page_info(cudf::detail::hostdevice_vector& printf("Schema %d\n", schemas[idx]); for (size_t pidx = 0; pidx < pages.size(); pidx++) { auto const& page = pages[h_page_index[pidx]]; - if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) { + if (page.flags & PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) { continue; } printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes); @@ -1075,10 +1070,9 @@ void print_cumulative_page_info(cudf::detail::hostdevice_vector& } } -void print_cumulative_row_info( - host_span sizes, - std::string const& label, - std::optional> splits = std::nullopt) +void print_cumulative_row_info(host_span sizes, + std::string const& label, + std::optional> splits = std::nullopt) { if (splits.has_value()) { printf("------------\nSplits\n"); @@ -1093,7 +1087,7 @@ void print_cumulative_row_info( if (splits.has_value()) { // if we have a split at this row count and this is the last instance of this row count auto start = thrust::make_transform_iterator( - splits->begin(), [](gpu::chunk_read_info const& i) { return i.skip_rows; }); + splits->begin(), [](chunk_read_info const& i) { return i.skip_rows; }); auto end = start + splits->size(); auto split = std::find(start, end, sizes[idx].row_count); auto const split_index = [&]() -> int { @@ -1180,12 +1174,12 @@ __device__ size_t row_size_functor::operator()(size_t num_rows, boo * Sums across all nesting levels. */ struct get_cumulative_row_info { - gpu::PageInfo const* const pages; + PageInfo const* const pages; __device__ cumulative_row_info operator()(size_type index) { auto const& page = pages[index]; - if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { + if (page.flags & PAGEINFO_FLAGS_DICTIONARY) { return cumulative_row_info{0, 0, page.src_col_schema}; } @@ -1250,15 +1244,15 @@ struct row_total_size { * @param num_rows Total number of rows to read * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns */ -std::vector find_splits(std::vector const& sizes, - size_t num_rows, - size_t chunk_read_limit) +std::vector find_splits(std::vector const& sizes, + size_t num_rows, + size_t chunk_read_limit) { // now we have an array of {row_count, real output bytes}. just walk through it and generate // splits. // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch // sizes are reasonably large, this shouldn't iterate too many times - std::vector splits; + std::vector splits; { size_t cur_pos = 0; size_t cur_cumulative_size = 0; @@ -1290,7 +1284,7 @@ std::vector find_splits(std::vector c auto const start_row = cur_row_count; cur_row_count = sizes[split_pos].row_count; - splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row}); + splits.push_back(chunk_read_info{start_row, cur_row_count - start_row}); cur_pos = split_pos; cur_cumulative_size = sizes[split_pos].size_bytes; } @@ -1311,12 +1305,11 @@ std::vector find_splits(std::vector c * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns * @param stream CUDA stream to use */ -std::vector compute_splits( - cudf::detail::hostdevice_vector& pages, - gpu::pass_intermediate_data const& id, - size_t num_rows, - size_t chunk_read_limit, - rmm::cuda_stream_view stream) +std::vector compute_splits(cudf::detail::hostdevice_vector& pages, + pass_intermediate_data const& id, + size_t num_rows, + size_t chunk_read_limit, + rmm::cuda_stream_view stream) { auto const& page_keys = id.page_keys; auto const& page_index = id.page_index; @@ -1395,16 +1388,16 @@ std::vector compute_splits( } struct get_page_chunk_idx { - __device__ size_type operator()(gpu::PageInfo const& page) { return page.chunk_idx; } + __device__ size_type operator()(PageInfo const& page) { return page.chunk_idx; } }; struct get_page_num_rows { - __device__ size_type operator()(gpu::PageInfo const& page) { return page.num_rows; } + __device__ size_type operator()(PageInfo const& page) { return page.num_rows; } }; struct get_page_column_index { - gpu::ColumnChunkDesc const* chunks; - __device__ size_type operator()(gpu::PageInfo const& page) + ColumnChunkDesc const* chunks; + __device__ size_type operator()(PageInfo const& page) { return chunks[page.chunk_idx].src_col_index; } @@ -1441,7 +1434,7 @@ struct get_page_nesting_size { input_col_info const* const input_cols; size_type const max_depth; size_t const num_pages; - gpu::PageInfo const* const pages; + PageInfo const* const pages; int const* page_indices; __device__ size_type operator()(size_t index) const @@ -1450,7 +1443,7 @@ struct get_page_nesting_size { auto const& page = pages[page_indices[indices.page_idx]]; if (page.src_col_schema != input_cols[indices.col_idx].schema_idx || - page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || + page.flags & PAGEINFO_FLAGS_DICTIONARY || indices.depth_idx >= input_cols[indices.col_idx].nesting_depth) { return 0; } @@ -1468,7 +1461,7 @@ struct get_reduction_key { * @brief Writes to the chunk_row field of the PageInfo struct. */ struct chunk_row_output_iter { - gpu::PageInfo* p; + PageInfo* p; using value_type = size_type; using difference_type = size_type; using pointer = size_type*; @@ -1490,7 +1483,7 @@ struct chunk_row_output_iter { * @brief Writes to the page_start_value field of the PageNestingInfo struct, keyed by schema. */ struct start_offset_output_iterator { - gpu::PageInfo const* pages; + PageInfo const* pages; int const* page_indices; size_t cur_index; input_col_info const* input_cols; @@ -1529,9 +1522,9 @@ struct start_offset_output_iterator { { auto const indices = reduction_indices{index, max_depth, num_pages}; - gpu::PageInfo const& p = pages[page_indices[indices.page_idx]]; + PageInfo const& p = pages[page_indices[indices.page_idx]]; if (p.src_col_schema != input_cols[indices.col_idx].schema_idx || - p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || + p.flags & PAGEINFO_FLAGS_DICTIONARY || indices.depth_idx >= input_cols[indices.col_idx].nesting_depth) { return empty; } @@ -1540,15 +1533,15 @@ struct start_offset_output_iterator { }; struct flat_column_num_rows { - gpu::PageInfo const* pages; - gpu::ColumnChunkDesc const* chunks; + PageInfo const* pages; + ColumnChunkDesc const* chunks; __device__ size_type operator()(size_type pindex) const { - gpu::PageInfo const& page = pages[pindex]; + PageInfo const& page = pages[pindex]; // ignore dictionary pages and pages belonging to any column containing repetition (lists) - if ((page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) || - (chunks[page.chunk_idx].max_level[gpu::level_type::REPETITION] > 0)) { + if ((page.flags & PAGEINFO_FLAGS_DICTIONARY) || + (chunks[page.chunk_idx].max_level[level_type::REPETITION] > 0)) { return 0; } return page.num_rows; @@ -1581,8 +1574,8 @@ struct row_counts_different { * @param expected_row_count Expected row count, if applicable * @param stream CUDA stream used for device memory operations and kernel launches */ -void detect_malformed_pages(cudf::detail::hostdevice_vector& pages, - cudf::detail::hostdevice_vector const& chunks, +void detect_malformed_pages(cudf::detail::hostdevice_vector& pages, + cudf::detail::hostdevice_vector const& chunks, device_span page_keys, device_span page_index, std::optional expected_row_count, @@ -1631,23 +1624,21 @@ void detect_malformed_pages(cudf::detail::hostdevice_vector& page } struct page_to_string_size { - gpu::PageInfo* pages; - gpu::ColumnChunkDesc const* chunks; + PageInfo* pages; + ColumnChunkDesc const* chunks; __device__ size_t operator()(size_type page_idx) const { auto const page = pages[page_idx]; auto const chunk = chunks[page.chunk_idx]; - if (not is_string_col(chunk) || (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) != 0) { - return 0; - } + if (not is_string_col(chunk) || (page.flags & PAGEINFO_FLAGS_DICTIONARY) != 0) { return 0; } return pages[page_idx].str_bytes; } }; struct page_offset_output_iter { - gpu::PageInfo* p; + PageInfo* p; size_type const* index; using value_type = size_type; @@ -1738,7 +1729,7 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re cols = &out_buf.children; // if this has a list parent, we have to get column sizes from the - // data computed during gpu::ComputePageSizes + // data computed during ComputePageSizes if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) { has_lists = true; break; @@ -1749,7 +1740,7 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re // generate string dict indices if necessary { - auto is_dict_chunk = [](gpu::ColumnChunkDesc const& chunk) { + auto is_dict_chunk = [](ColumnChunkDesc const& chunk) { return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0; }; @@ -1785,7 +1776,7 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re if (total_str_dict_indexes > 0) { chunks.host_to_device_async(_stream); - gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream); + BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream); } } @@ -1800,14 +1791,14 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re // if: // - user has passed custom row bounds // - we will be doing a chunked read - gpu::ComputePageSizes(pages, - chunks, - 0, // 0-max size_t. process all possible rows - std::numeric_limits::max(), - true, // compute num_rows - chunk_read_limit > 0, // compute string sizes - _pass_itm_data->level_type_size, - _stream); + ComputePageSizes(pages, + chunks, + 0, // 0-max size_t. process all possible rows + std::numeric_limits::max(), + true, // compute num_rows + chunk_read_limit > 0, // compute string sizes + _pass_itm_data->level_type_size, + _stream); // computes: // PageInfo::chunk_row (the absolute start row index) for all pages @@ -1836,7 +1827,7 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re _pass_itm_data->output_chunk_read_info = _output_chunk_read_limit > 0 ? compute_splits(pages, *_pass_itm_data, num_rows, chunk_read_limit, _stream) - : std::vector{{skip_rows, num_rows}}; + : std::vector{{skip_rows, num_rows}}; } void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds) @@ -1853,14 +1844,14 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses // respect the user bounds. It is only necessary to do this second pass if uses_custom_row_bounds // is set (if the user has specified artificial bounds). if (uses_custom_row_bounds) { - gpu::ComputePageSizes(pages, - chunks, - skip_rows, - num_rows, - false, // num_rows is already computed - false, // no need to compute string sizes - _pass_itm_data->level_type_size, - _stream); + ComputePageSizes(pages, + chunks, + skip_rows, + num_rows, + false, // num_rows is already computed + false, // no need to compute string sizes + _pass_itm_data->level_type_size, + _stream); // print_pages(pages, _stream); } @@ -1879,7 +1870,7 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses cols = &out_buf.children; // if this has a list parent, we have to get column sizes from the - // data computed during gpu::ComputePageSizes + // data computed during ComputePageSizes if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) { has_lists = true; } @@ -2014,4 +2005,4 @@ std::vector reader::impl::calculate_page_string_offsets() return col_sizes; } -} // namespace cudf::io::detail::parquet +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/rle_stream.cuh b/cpp/src/io/parquet/rle_stream.cuh index 2545a074a38..799d6d9fd64 100644 --- a/cpp/src/io/parquet/rle_stream.cuh +++ b/cpp/src/io/parquet/rle_stream.cuh @@ -20,7 +20,7 @@ #include #include -namespace cudf::io::parquet::gpu { +namespace cudf::io::parquet::detail { template constexpr int rle_stream_required_run_buffer_size() @@ -362,4 +362,4 @@ struct rle_stream { } }; -} // namespace cudf::io::parquet::gpu +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index a124f352ee4..50589f23626 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -54,12 +54,9 @@ #include #include -namespace cudf { -namespace io { -namespace detail { -namespace parquet { -using namespace cudf::io::parquet; -using namespace cudf::io; +namespace cudf::io::parquet::detail { + +using namespace cudf::io::detail; struct aggregate_writer_metadata { aggregate_writer_metadata(host_span partitions, @@ -185,13 +182,13 @@ namespace { * @param compression The compression type * @return The supported Parquet compression */ -parquet::Compression to_parquet_compression(compression_type compression) +Compression to_parquet_compression(compression_type compression) { switch (compression) { case compression_type::AUTO: - case compression_type::SNAPPY: return parquet::Compression::SNAPPY; - case compression_type::ZSTD: return parquet::Compression::ZSTD; - case compression_type::NONE: return parquet::Compression::UNCOMPRESSED; + case compression_type::SNAPPY: return Compression::SNAPPY; + case compression_type::ZSTD: return Compression::ZSTD; + case compression_type::NONE: return Compression::UNCOMPRESSED; default: CUDF_FAIL("Unsupported compression type"); } } @@ -206,7 +203,7 @@ void update_chunk_encodings(std::vector& encodings, uint32_t enc_mask) { for (uint8_t enc = 0; enc < static_cast(Encoding::NUM_ENCODINGS); enc++) { auto const enc_enum = static_cast(enc); - if ((enc_mask & gpu::encoding_to_mask(enc_enum)) != 0) { encodings.push_back(enc_enum); } + if ((enc_mask & encoding_to_mask(enc_enum)) != 0) { encodings.push_back(enc_enum); } } } @@ -761,11 +758,11 @@ struct parquet_column_view { std::vector const& schema_tree, rmm::cuda_stream_view stream); - [[nodiscard]] gpu::parquet_column_device_view get_device_view(rmm::cuda_stream_view stream) const; + [[nodiscard]] parquet_column_device_view get_device_view(rmm::cuda_stream_view stream) const; [[nodiscard]] column_view cudf_column_view() const { return cudf_col; } - [[nodiscard]] parquet::Type physical_type() const { return schema_node.type; } - [[nodiscard]] parquet::ConvertedType converted_type() const { return schema_node.converted_type; } + [[nodiscard]] Type physical_type() const { return schema_node.type; } + [[nodiscard]] ConvertedType converted_type() const { return schema_node.converted_type; } std::vector const& get_path_in_schema() { return path_in_schema; } @@ -846,11 +843,11 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, uint16_t max_rep_level = 0; curr_schema_node = schema_node; while (curr_schema_node.parent_idx != -1) { - if (curr_schema_node.repetition_type == parquet::REPEATED or - curr_schema_node.repetition_type == parquet::OPTIONAL) { + if (curr_schema_node.repetition_type == REPEATED or + curr_schema_node.repetition_type == OPTIONAL) { ++max_def_level; } - if (curr_schema_node.repetition_type == parquet::REPEATED) { ++max_rep_level; } + if (curr_schema_node.repetition_type == REPEATED) { ++max_rep_level; } curr_schema_node = schema_tree[curr_schema_node.parent_idx]; } CUDF_EXPECTS(max_def_level < 256, "Definition levels above 255 are not supported"); @@ -897,9 +894,9 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, } } -gpu::parquet_column_device_view parquet_column_view::get_device_view(rmm::cuda_stream_view) const +parquet_column_device_view parquet_column_view::get_device_view(rmm::cuda_stream_view) const { - auto desc = gpu::parquet_column_device_view{}; // Zero out all fields + auto desc = parquet_column_device_view{}; // Zero out all fields desc.stats_dtype = schema_node.stats_dtype; desc.ts_scale = schema_node.ts_scale; @@ -931,8 +928,8 @@ gpu::parquet_column_device_view parquet_column_view::get_device_view(rmm::cuda_s * @param fragment_size Number of rows per fragment * @param stream CUDA stream used for device memory operations and kernel launches */ -void init_row_group_fragments(cudf::detail::hostdevice_2dvector& frag, - device_span col_desc, +void init_row_group_fragments(cudf::detail::hostdevice_2dvector& frag, + device_span col_desc, host_span partitions, device_span part_frag_offset, uint32_t fragment_size, @@ -940,7 +937,7 @@ void init_row_group_fragments(cudf::detail::hostdevice_2dvector frag, +void calculate_page_fragments(device_span frag, host_span frag_sizes, rmm::cuda_stream_view stream) { auto d_frag_sz = cudf::detail::make_device_uvector_async( frag_sizes, stream, rmm::mr::get_current_device_resource()); - gpu::CalculatePageFragments(frag, d_frag_sz, stream); + CalculatePageFragments(frag, d_frag_sz, stream); } /** @@ -972,13 +969,13 @@ void calculate_page_fragments(device_span frag, * @param stream CUDA stream used for device memory operations and kernel launches */ void gather_fragment_statistics(device_span frag_stats, - device_span frags, + device_span frags, bool int96_timestamps, rmm::cuda_stream_view stream) { rmm::device_uvector frag_stats_group(frag_stats.size(), stream); - gpu::InitFragmentStatistics(frag_stats_group, frags, stream); + InitFragmentStatistics(frag_stats_group, frags, stream); detail::calculate_group_statistics( frag_stats.data(), frag_stats_group.data(), frag_stats.size(), stream, int96_timestamps); stream.synchronize(); @@ -1008,8 +1005,8 @@ size_t max_compression_output_size(Compression codec, uint32_t compression_block return compress_max_output_chunk_size(to_nvcomp_compression_type(codec), compression_blocksize); } -auto init_page_sizes(hostdevice_2dvector& chunks, - device_span col_desc, +auto init_page_sizes(hostdevice_2dvector& chunks, + device_span col_desc, uint32_t num_columns, size_t max_page_size_bytes, size_type max_page_size_rows, @@ -1021,19 +1018,19 @@ auto init_page_sizes(hostdevice_2dvector& chunks, chunks.host_to_device_async(stream); // Calculate number of pages and store in respective chunks - gpu::InitEncoderPages(chunks, - {}, - {}, - {}, - col_desc, - num_columns, - max_page_size_bytes, - max_page_size_rows, - page_alignment(compression_codec), - write_v2_headers, - nullptr, - nullptr, - stream); + InitEncoderPages(chunks, + {}, + {}, + {}, + col_desc, + num_columns, + max_page_size_bytes, + max_page_size_rows, + page_alignment(compression_codec), + write_v2_headers, + nullptr, + nullptr, + stream); chunks.device_to_host_sync(stream); int num_pages = 0; @@ -1046,19 +1043,19 @@ auto init_page_sizes(hostdevice_2dvector& chunks, // Now that we know the number of pages, allocate an array to hold per page size and get it // populated cudf::detail::hostdevice_vector page_sizes(num_pages, stream); - gpu::InitEncoderPages(chunks, - {}, - page_sizes, - {}, - col_desc, - num_columns, - max_page_size_bytes, - max_page_size_rows, - page_alignment(compression_codec), - write_v2_headers, - nullptr, - nullptr, - stream); + InitEncoderPages(chunks, + {}, + page_sizes, + {}, + col_desc, + num_columns, + max_page_size_bytes, + max_page_size_rows, + page_alignment(compression_codec), + write_v2_headers, + nullptr, + nullptr, + stream); page_sizes.device_to_host_sync(stream); // Get per-page max compressed size @@ -1072,26 +1069,26 @@ auto init_page_sizes(hostdevice_2dvector& chunks, comp_page_sizes.host_to_device_async(stream); // Use per-page max compressed size to calculate chunk.compressed_size - gpu::InitEncoderPages(chunks, - {}, - {}, - comp_page_sizes, - col_desc, - num_columns, - max_page_size_bytes, - max_page_size_rows, - page_alignment(compression_codec), - write_v2_headers, - nullptr, - nullptr, - stream); + InitEncoderPages(chunks, + {}, + {}, + comp_page_sizes, + col_desc, + num_columns, + max_page_size_bytes, + max_page_size_rows, + page_alignment(compression_codec), + write_v2_headers, + nullptr, + nullptr, + stream); chunks.device_to_host_sync(stream); return comp_page_sizes; } size_t max_page_bytes(Compression compression, size_t max_page_size_bytes) { - if (compression == parquet::Compression::UNCOMPRESSED) { return max_page_size_bytes; } + if (compression == Compression::UNCOMPRESSED) { return max_page_size_bytes; } auto const ncomp_type = to_nvcomp_compression_type(compression); auto const nvcomp_limit = nvcomp::is_compression_disabled(ncomp_type) @@ -1104,9 +1101,9 @@ size_t max_page_bytes(Compression compression, size_t max_page_size_bytes) } std::pair>, std::vector>> -build_chunk_dictionaries(hostdevice_2dvector& chunks, - host_span col_desc, - device_2dspan frags, +build_chunk_dictionaries(hostdevice_2dvector& chunks, + host_span col_desc, + device_2dspan frags, Compression compression, dictionary_policy dict_policy, size_t max_dict_size, @@ -1130,7 +1127,7 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, } // Allocate slots for each chunk - std::vector> hash_maps_storage; + std::vector> hash_maps_storage; hash_maps_storage.reserve(h_chunks.size()); for (auto& chunk : h_chunks) { if (col_desc[chunk.col_desc_id].physical_type == Type::BOOLEAN || @@ -1149,8 +1146,8 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, chunks.host_to_device_async(stream); - gpu::initialize_chunk_hash_maps(chunks.device_view().flat_view(), stream); - gpu::populate_chunk_hash_maps(frags, stream); + initialize_chunk_hash_maps(chunks.device_view().flat_view(), stream); + populate_chunk_hash_maps(frags, stream); chunks.device_to_host_sync(stream); @@ -1197,8 +1194,8 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, chunk.dict_index = inserted_dict_index.data(); } chunks.host_to_device_async(stream); - gpu::collect_map_entries(chunks.device_view().flat_view(), stream); - gpu::get_dictionary_indices(frags, stream); + collect_map_entries(chunks.device_view().flat_view(), stream); + get_dictionary_indices(frags, stream); return std::pair(std::move(dict_data), std::move(dict_index)); } @@ -1221,9 +1218,9 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, * @param write_v2_headers True if version 2 page headers are to be written * @param stream CUDA stream used for device memory operations and kernel launches */ -void init_encoder_pages(hostdevice_2dvector& chunks, - device_span col_desc, - device_span pages, +void init_encoder_pages(hostdevice_2dvector& chunks, + device_span col_desc, + device_span pages, cudf::detail::hostdevice_vector& comp_page_sizes, statistics_chunk* page_stats, statistics_chunk* frag_stats, @@ -1286,8 +1283,8 @@ void init_encoder_pages(hostdevice_2dvector& chunks, * @param write_v2_headers True if V2 page headers should be written * @param stream CUDA stream used for device memory operations and kernel launches */ -void encode_pages(hostdevice_2dvector& chunks, - device_span pages, +void encode_pages(hostdevice_2dvector& chunks, + device_span pages, uint32_t pages_in_batch, uint32_t first_page_in_batch, uint32_t rowgroups_in_batch, @@ -1308,8 +1305,7 @@ void encode_pages(hostdevice_2dvector& chunks, ? device_span(page_stats + first_page_in_batch, pages_in_batch) : device_span(); - uint32_t max_comp_pages = - (compression != parquet::Compression::UNCOMPRESSED) ? pages_in_batch : 0; + uint32_t max_comp_pages = (compression != Compression::UNCOMPRESSED) ? pages_in_batch : 0; rmm::device_uvector> comp_in(max_comp_pages, stream); rmm::device_uvector> comp_out(max_comp_pages, stream); @@ -1319,9 +1315,9 @@ void encode_pages(hostdevice_2dvector& chunks, comp_res.end(), compression_result{0, compression_status::FAILURE}); - gpu::EncodePages(batch_pages, write_v2_headers, comp_in, comp_out, comp_res, stream); + EncodePages(batch_pages, write_v2_headers, comp_in, comp_out, comp_res, stream); switch (compression) { - case parquet::Compression::SNAPPY: + case Compression::SNAPPY: if (nvcomp::is_compression_disabled(nvcomp::compression_type::SNAPPY)) { gpu_snap(comp_in, comp_out, comp_res, stream); } else { @@ -1329,7 +1325,7 @@ void encode_pages(hostdevice_2dvector& chunks, nvcomp::compression_type::SNAPPY, comp_in, comp_out, comp_res, stream); } break; - case parquet::Compression::ZSTD: { + case Compression::ZSTD: { if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD); reason) { CUDF_FAIL("Compression error: " + reason.value()); @@ -1338,7 +1334,7 @@ void encode_pages(hostdevice_2dvector& chunks, break; } - case parquet::Compression::UNCOMPRESSED: break; + case Compression::UNCOMPRESSED: break; default: CUDF_FAIL("invalid compression type"); } @@ -1378,7 +1374,7 @@ void encode_pages(hostdevice_2dvector& chunks, * @param column_index_truncate_length maximum length of min or max values in column index, in bytes * @return Computed buffer size needed to encode the column index */ -size_t column_index_buffer_size(gpu::EncColumnChunk* ck, int32_t column_index_truncate_length) +size_t column_index_buffer_size(EncColumnChunk* ck, int32_t column_index_truncate_length) { // encoding the column index for a given chunk requires: // each list (4 of them) requires 6 bytes of overhead @@ -1499,8 +1495,8 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, std::vector this_table_schema(schema_tree.begin(), schema_tree.end()); // Initialize column description - cudf::detail::hostdevice_vector col_desc(parquet_columns.size(), - stream); + cudf::detail::hostdevice_vector col_desc(parquet_columns.size(), + stream); std::transform( parquet_columns.begin(), parquet_columns.end(), col_desc.host_ptr(), [&](auto const& pcol) { return pcol.get_device_view(stream); @@ -1576,7 +1572,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, auto d_part_frag_offset = cudf::detail::make_device_uvector_async( part_frag_offset, stream, rmm::mr::get_current_device_resource()); - cudf::detail::hostdevice_2dvector row_group_fragments( + cudf::detail::hostdevice_2dvector row_group_fragments( num_columns, num_fragments, stream); // Create table_device_view so that corresponding column_device_view data @@ -1588,7 +1584,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, if (num_fragments != 0) { // Move column info to device col_desc.host_to_device_async(stream); - leaf_column_views = create_leaf_column_device_views( + leaf_column_views = create_leaf_column_device_views( col_desc, *parent_column_table_device_view, stream); init_row_group_fragments(row_group_fragments, @@ -1662,7 +1658,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, // Initialize row groups and column chunks auto const num_chunks = num_rowgroups * num_columns; - hostdevice_2dvector chunks(num_rowgroups, num_columns, stream); + hostdevice_2dvector chunks(num_rowgroups, num_columns, stream); // total fragments per column (in case they are non-uniform) std::vector frags_per_column(num_columns, 0); @@ -1678,7 +1674,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, row_group.total_byte_size = 0; row_group.columns.resize(num_columns); for (int c = 0; c < num_columns; c++) { - gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; + EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; ck = {}; ck.col_desc = col_desc.device_ptr() + c; @@ -1700,7 +1696,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, return l + r.num_values; }); ck.plain_data_size = std::accumulate( - chunk_fragments.begin(), chunk_fragments.end(), 0, [](int sum, gpu::PageFragment frag) { + chunk_fragments.begin(), chunk_fragments.end(), 0, [](int sum, PageFragment frag) { return sum + frag.fragment_data_size; }); auto& column_chunk_meta = row_group.columns[c].meta_data; @@ -1731,7 +1727,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, frags_per_column.empty() ? 0 : frag_offsets.back() + frags_per_column.back(); rmm::device_uvector frag_stats(0, stream); - cudf::detail::hostdevice_vector page_fragments(total_frags, stream); + cudf::detail::hostdevice_vector page_fragments(total_frags, stream); // update fragments and/or prepare for fragment statistics calculation if necessary if (total_frags != 0) { @@ -1749,9 +1745,9 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, auto const& row_group = agg_meta->file(p).row_groups[global_r]; uint32_t const fragments_in_chunk = util::div_rounding_up_unsafe(row_group.num_rows, frag_size); - gpu::EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; - ck.fragments = page_fragments.device_ptr(frag_offset); - ck.first_fragment = frag_offset; + EncColumnChunk& ck = chunks[r + first_rg_in_part[p]][c]; + ck.fragments = page_fragments.device_ptr(frag_offset); + ck.first_fragment = frag_offset; // update the chunk pointer here for each fragment in chunk.fragments for (uint32_t i = 0; i < fragments_in_chunk; i++) { @@ -1817,8 +1813,8 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, size_t comp_rowgroup_size = 0; if (r < num_rowgroups) { for (int i = 0; i < num_columns; i++) { - gpu::EncColumnChunk* ck = &chunks[r][i]; - ck->first_page = num_pages; + EncColumnChunk* ck = &chunks[r][i]; + ck->first_page = num_pages; num_pages += ck->num_pages; pages_in_batch += ck->num_pages; rowgroup_size += ck->bfr_size; @@ -1850,7 +1846,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, } // Clear compressed buffer size if compression has been turned off - if (compression == parquet::Compression::UNCOMPRESSED) { max_comp_bfr_size = 0; } + if (compression == Compression::UNCOMPRESSED) { max_comp_bfr_size = 0; } // Initialize data pointers in batch uint32_t const num_stats_bfr = @@ -1864,7 +1860,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, stream); rmm::device_buffer col_idx_bfr(column_index_bfr_size, stream); - rmm::device_uvector pages(num_pages, stream); + rmm::device_uvector pages(num_pages, stream); // This contains stats for both the pages and the rowgroups. TODO: make them separate. rmm::device_uvector page_stats(num_stats_bfr, stream); @@ -1874,10 +1870,10 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, auto bfr_c = static_cast(comp_bfr.data()); for (auto j = 0; j < batch_list[b]; j++, r++) { for (auto i = 0; i < num_columns; i++) { - gpu::EncColumnChunk& ck = chunks[r][i]; - ck.uncompressed_bfr = bfr; - ck.compressed_bfr = bfr_c; - ck.column_index_blob = bfr_i; + EncColumnChunk& ck = chunks[r][i]; + ck.uncompressed_bfr = bfr; + ck.compressed_bfr = bfr_c; + ck.column_index_blob = bfr_i; bfr += ck.bfr_size; bfr_c += ck.compressed_size; if (stats_granularity == statistics_freq::STATISTICS_COLUMN) { @@ -1960,7 +1956,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, if (ck.ck_stat_size != 0) { std::vector const stats_blob = cudf::detail::make_std_vector_sync( device_span(dev_bfr, ck.ck_stat_size), stream); - cudf::io::parquet::CompactProtocolReader cp(stats_blob.data(), stats_blob.size()); + CompactProtocolReader cp(stats_blob.data(), stats_blob.size()); cp.read(&column_chunk_meta.statistics); need_sync = true; } @@ -2142,8 +2138,8 @@ void writer::impl::write(table_view const& input, std::vector co void writer::impl::write_parquet_data_to_sink( std::unique_ptr& updated_agg_meta, - device_span pages, - host_2dspan chunks, + device_span pages, + host_2dspan chunks, host_span global_rowgroup_base, host_span first_rg_in_part, host_span batch_list, @@ -2209,7 +2205,7 @@ void writer::impl::write_parquet_data_to_sink( int const global_r = global_rowgroup_base[p] + r - first_rg_in_part[p]; auto const& row_group = _agg_meta->file(p).row_groups[global_r]; for (std::size_t i = 0; i < num_columns; i++) { - gpu::EncColumnChunk const& ck = chunks[r][i]; + EncColumnChunk const& ck = chunks[r][i]; auto const& column_chunk_meta = row_group.columns[i].meta_data; // start transfer of the column index @@ -2392,7 +2388,4 @@ std::unique_ptr> writer::merge_row_group_metadata( return std::make_unique>(std::move(output)); } -} // namespace parquet -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index 89ef85ba2bd..1d27a8400c8 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -38,15 +38,11 @@ #include #include -namespace cudf { -namespace io { -namespace detail { -namespace parquet { +namespace cudf::io::parquet::detail { + // Forward internal classes struct aggregate_writer_metadata; -using namespace cudf::io::parquet; -using namespace cudf::io; using cudf::detail::device_2dspan; using cudf::detail::host_2dspan; using cudf::detail::hostdevice_2dvector; @@ -66,7 +62,7 @@ class writer::impl { */ explicit impl(std::vector> sinks, parquet_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -79,7 +75,7 @@ class writer::impl { */ explicit impl(std::vector> sinks, chunked_parquet_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -139,8 +135,8 @@ class writer::impl { * @param[out] bounce_buffer Temporary host output buffer */ void write_parquet_data_to_sink(std::unique_ptr& updated_agg_meta, - device_span pages, - host_2dspan chunks, + device_span pages, + host_2dspan chunks, host_span global_rowgroup_base, host_span first_rg_in_part, host_span batch_list, @@ -164,9 +160,10 @@ class writer::impl { bool const _write_v2_headers; int32_t const _column_index_truncate_length; std::vector> const _kv_meta; // Optional user metadata. - single_write_mode const _single_write_mode; // Special parameter only used by `write()` to - // indicate that we are guaranteeing a single table - // write. This enables some internal optimizations. + cudf::io::detail::single_write_mode const + _single_write_mode; // Special parameter only used by `write()` to + // indicate that we are guaranteeing a single table + // write. This enables some internal optimizations. std::vector> const _out_sink; // Internal states, filled during `write()` and written to sink during `write` and `close()`. @@ -180,7 +177,4 @@ class writer::impl { bool _closed = false; // To track if the output has been written to sink. }; -} // namespace parquet -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace cudf::io::parquet::detail diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 73c946a5feb..3e5d7033e60 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -200,29 +200,30 @@ std::unique_ptr make_parquet_list_list_col( // of the file to populate the FileMetaData pointed to by file_meta_data. // throws cudf::logic_error if the file or metadata is invalid. void read_footer(std::unique_ptr const& source, - cudf::io::parquet::FileMetaData* file_meta_data) + cudf::io::parquet::detail::FileMetaData* file_meta_data) { - constexpr auto header_len = sizeof(cudf::io::parquet::file_header_s); - constexpr auto ender_len = sizeof(cudf::io::parquet::file_ender_s); + constexpr auto header_len = sizeof(cudf::io::parquet::detail::file_header_s); + constexpr auto ender_len = sizeof(cudf::io::parquet::detail::file_ender_s); auto const len = source->size(); auto const header_buffer = source->host_read(0, header_len); auto const header = - reinterpret_cast(header_buffer->data()); + reinterpret_cast(header_buffer->data()); auto const ender_buffer = source->host_read(len - ender_len, ender_len); - auto const ender = reinterpret_cast(ender_buffer->data()); + auto const ender = + reinterpret_cast(ender_buffer->data()); // checks for valid header, footer, and file length ASSERT_GT(len, header_len + ender_len); - ASSERT_TRUE(header->magic == cudf::io::parquet::parquet_magic && - ender->magic == cudf::io::parquet::parquet_magic); + ASSERT_TRUE(header->magic == cudf::io::parquet::detail::parquet_magic && + ender->magic == cudf::io::parquet::detail::parquet_magic); ASSERT_TRUE(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len)); // parquet files end with 4-byte footer_length and 4-byte magic == "PAR1" // seek backwards from the end of the file (footer_length + 8 bytes of ender) auto const footer_buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len); - cudf::io::parquet::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len); + cudf::io::parquet::detail::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len); // returns true on success bool res = cp.read(file_meta_data); @@ -233,14 +234,14 @@ void read_footer(std::unique_ptr const& source, // this assumes the data is uncompressed. // throws cudf::logic_error if the page_loc data is invalid. int read_dict_bits(std::unique_ptr const& source, - cudf::io::parquet::PageLocation const& page_loc) + cudf::io::parquet::detail::PageLocation const& page_loc) { CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header"); CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length"); - cudf::io::parquet::PageHeader page_hdr; + cudf::io::parquet::detail::PageHeader page_hdr; auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size); - cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size()); + cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size()); bool res = cp.read(&page_hdr); CUDF_EXPECTS(res, "Cannot parse page header"); @@ -252,15 +253,16 @@ int read_dict_bits(std::unique_ptr const& source, // read column index from datasource at location indicated by chunk, // parse and return as a ColumnIndex struct. // throws cudf::logic_error if the chunk data is invalid. -cudf::io::parquet::ColumnIndex read_column_index( - std::unique_ptr const& source, cudf::io::parquet::ColumnChunk const& chunk) +cudf::io::parquet::detail::ColumnIndex read_column_index( + std::unique_ptr const& source, + cudf::io::parquet::detail::ColumnChunk const& chunk) { CUDF_EXPECTS(chunk.column_index_offset > 0, "Cannot find column index"); CUDF_EXPECTS(chunk.column_index_length > 0, "Invalid column index length"); - cudf::io::parquet::ColumnIndex colidx; + cudf::io::parquet::detail::ColumnIndex colidx; auto const ci_buf = source->host_read(chunk.column_index_offset, chunk.column_index_length); - cudf::io::parquet::CompactProtocolReader cp(ci_buf->data(), ci_buf->size()); + cudf::io::parquet::detail::CompactProtocolReader cp(ci_buf->data(), ci_buf->size()); bool res = cp.read(&colidx); CUDF_EXPECTS(res, "Cannot parse column index"); return colidx; @@ -269,22 +271,24 @@ cudf::io::parquet::ColumnIndex read_column_index( // read offset index from datasource at location indicated by chunk, // parse and return as an OffsetIndex struct. // throws cudf::logic_error if the chunk data is invalid. -cudf::io::parquet::OffsetIndex read_offset_index( - std::unique_ptr const& source, cudf::io::parquet::ColumnChunk const& chunk) +cudf::io::parquet::detail::OffsetIndex read_offset_index( + std::unique_ptr const& source, + cudf::io::parquet::detail::ColumnChunk const& chunk) { CUDF_EXPECTS(chunk.offset_index_offset > 0, "Cannot find offset index"); CUDF_EXPECTS(chunk.offset_index_length > 0, "Invalid offset index length"); - cudf::io::parquet::OffsetIndex offidx; + cudf::io::parquet::detail::OffsetIndex offidx; auto const oi_buf = source->host_read(chunk.offset_index_offset, chunk.offset_index_length); - cudf::io::parquet::CompactProtocolReader cp(oi_buf->data(), oi_buf->size()); + cudf::io::parquet::detail::CompactProtocolReader cp(oi_buf->data(), oi_buf->size()); bool res = cp.read(&offidx); CUDF_EXPECTS(res, "Cannot parse offset index"); return offidx; } // Return as a Statistics from the column chunk -cudf::io::parquet::Statistics const& get_statistics(cudf::io::parquet::ColumnChunk const& chunk) +cudf::io::parquet::detail::Statistics const& get_statistics( + cudf::io::parquet::detail::ColumnChunk const& chunk) { return chunk.meta_data.statistics; } @@ -292,15 +296,16 @@ cudf::io::parquet::Statistics const& get_statistics(cudf::io::parquet::ColumnChu // read page header from datasource at location indicated by page_loc, // parse and return as a PageHeader struct. // throws cudf::logic_error if the page_loc data is invalid. -cudf::io::parquet::PageHeader read_page_header(std::unique_ptr const& source, - cudf::io::parquet::PageLocation const& page_loc) +cudf::io::parquet::detail::PageHeader read_page_header( + std::unique_ptr const& source, + cudf::io::parquet::detail::PageLocation const& page_loc) { CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header"); CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length"); - cudf::io::parquet::PageHeader page_hdr; + cudf::io::parquet::detail::PageHeader page_hdr; auto const page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size); - cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size()); + cudf::io::parquet::detail::CompactProtocolReader cp(page_buf->data(), page_buf->size()); bool res = cp.read(&page_hdr); CUDF_EXPECTS(res, "Cannot parse page header"); return page_hdr; @@ -3686,7 +3691,7 @@ TEST_F(ParquetWriterTest, CheckPageRows) // check first page header and make sure it has only page_rows values auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); ASSERT_GT(fmd.row_groups.size(), 0); @@ -3697,7 +3702,7 @@ TEST_F(ParquetWriterTest, CheckPageRows) // read first data page header. sizeof(PageHeader) is not exact, but the thrift encoded // version should be smaller than size of the struct. auto const ph = read_page_header( - source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0}); + source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0}); EXPECT_EQ(ph.data_page_header.num_values, page_rows); } @@ -3722,7 +3727,7 @@ TEST_F(ParquetWriterTest, CheckPageRowsAdjusted) // check first page header and make sure it has only page_rows values auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); ASSERT_GT(fmd.row_groups.size(), 0); @@ -3733,7 +3738,7 @@ TEST_F(ParquetWriterTest, CheckPageRowsAdjusted) // read first data page header. sizeof(PageHeader) is not exact, but the thrift encoded // version should be smaller than size of the struct. auto const ph = read_page_header( - source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0}); + source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0}); EXPECT_LE(ph.data_page_header.num_values, rows_per_page); } @@ -3759,7 +3764,7 @@ TEST_F(ParquetWriterTest, CheckPageRowsTooSmall) // check that file is written correctly when rows/page < fragment size auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); ASSERT_TRUE(fmd.row_groups.size() > 0); @@ -3770,7 +3775,7 @@ TEST_F(ParquetWriterTest, CheckPageRowsTooSmall) // read first data page header. sizeof(PageHeader) is not exact, but the thrift encoded // version should be smaller than size of the struct. auto const ph = read_page_header( - source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0}); + source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::detail::PageHeader), 0}); // there should be only one page since the fragment size is larger than rows_per_page EXPECT_EQ(ph.data_page_header.num_values, num_rows); @@ -3798,7 +3803,7 @@ TEST_F(ParquetWriterTest, Decimal128Stats) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4031,7 +4036,7 @@ TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); ASSERT_GT(fmd.row_groups.size(), 0); @@ -4041,10 +4046,10 @@ TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted) // now check that the boundary order for chunk 1 is ascending, // chunk 2 is descending, and chunk 3 is unordered - cudf::io::parquet::BoundaryOrder expected_orders[] = { - cudf::io::parquet::BoundaryOrder::ASCENDING, - cudf::io::parquet::BoundaryOrder::DESCENDING, - cudf::io::parquet::BoundaryOrder::UNORDERED}; + cudf::io::parquet::detail::BoundaryOrder expected_orders[] = { + cudf::io::parquet::detail::BoundaryOrder::ASCENDING, + cudf::io::parquet::detail::BoundaryOrder::DESCENDING, + cudf::io::parquet::detail::BoundaryOrder::UNORDERED}; for (std::size_t i = 0; i < columns.size(); i++) { auto const ci = read_column_index(source, columns[i]); @@ -4067,15 +4072,15 @@ int32_t compare(T& v1, T& v2) // 1 if v1 > v2. int32_t compare_binary(std::vector const& v1, std::vector const& v2, - cudf::io::parquet::Type ptype, - cudf::io::parquet::ConvertedType ctype) + cudf::io::parquet::detail::Type ptype, + cudf::io::parquet::detail::ConvertedType ctype) { switch (ptype) { - case cudf::io::parquet::INT32: + case cudf::io::parquet::detail::INT32: switch (ctype) { - case cudf::io::parquet::UINT_8: - case cudf::io::parquet::UINT_16: - case cudf::io::parquet::UINT_32: + case cudf::io::parquet::detail::UINT_8: + case cudf::io::parquet::detail::UINT_16: + case cudf::io::parquet::detail::UINT_32: return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); default: @@ -4083,23 +4088,23 @@ int32_t compare_binary(std::vector const& v1, *(reinterpret_cast(v2.data()))); } - case cudf::io::parquet::INT64: - if (ctype == cudf::io::parquet::UINT_64) { + case cudf::io::parquet::detail::INT64: + if (ctype == cudf::io::parquet::detail::UINT_64) { return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); } return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); - case cudf::io::parquet::FLOAT: + case cudf::io::parquet::detail::FLOAT: return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); - case cudf::io::parquet::DOUBLE: + case cudf::io::parquet::detail::DOUBLE: return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); - case cudf::io::parquet::BYTE_ARRAY: { + case cudf::io::parquet::detail::BYTE_ARRAY: { int32_t v1sz = v1.size(); int32_t v2sz = v2.size(); int32_t ret = memcmp(v1.data(), v2.data(), std::min(v1sz, v2sz)); @@ -4142,7 +4147,7 @@ TEST_P(ParquetV2Test, LargeColumnIndex) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4164,10 +4169,10 @@ TEST_P(ParquetV2Test, LargeColumnIndex) TEST_P(ParquetV2Test, CheckColumnOffsetIndex) { - constexpr auto num_rows = 100000; - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + constexpr auto num_rows = 100000; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; // fixed length strings auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { @@ -4210,7 +4215,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4255,10 +4260,10 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex) TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls) { - constexpr auto num_rows = 100000; - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + constexpr auto num_rows = 100000; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; // fixed length strings auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { @@ -4311,7 +4316,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4362,10 +4367,10 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls) TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn) { - constexpr auto num_rows = 100000; - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + constexpr auto num_rows = 100000; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; // fixed length strings auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { @@ -4403,7 +4408,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4458,9 +4463,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn) TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct) { - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; auto c0 = testdata::ascending(); @@ -4495,7 +4500,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4542,9 +4547,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct) TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls) { - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; auto validity2 = cudf::detail::make_counting_transform_iterator(0, [](cudf::size_type i) { return i % 2; }); @@ -4586,7 +4591,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4616,9 +4621,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls) TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls) { - auto const is_v2 = GetParam(); - auto const expected_hdr_type = - is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE; + auto const is_v2 = GetParam(); + auto const expected_hdr_type = is_v2 ? cudf::io::parquet::detail::PageType::DATA_PAGE_V2 + : cudf::io::parquet::detail::PageType::DATA_PAGE; using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; @@ -4711,7 +4716,7 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4812,7 +4817,7 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -4870,7 +4875,7 @@ TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation) cudf::io::write_parquet(out_opts); auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); @@ -5030,10 +5035,10 @@ TEST_F(ParquetReaderTest, NestedByteArray) cudf::io::write_parquet(out_opts); auto source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); - EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY); + EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::detail::Type::BYTE_ARRAY); std::vector md{ {}, @@ -5081,12 +5086,12 @@ TEST_F(ParquetWriterTest, ByteArrayStats) auto result = cudf::io::read_parquet(in_opts); auto source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); - EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::Type::BYTE_ARRAY); - EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::Type::BYTE_ARRAY); + EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::detail::Type::BYTE_ARRAY); + EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::detail::Type::BYTE_ARRAY); auto const stats0 = get_statistics(fmd.row_groups[0].columns[0]); auto const stats1 = get_statistics(fmd.row_groups[0].columns[1]); @@ -5137,9 +5142,9 @@ TEST_F(ParquetReaderTest, StructByteArray) TEST_F(ParquetReaderTest, NestingOptimizationTest) { - // test nesting levels > cudf::io::parquet::gpu::max_cacheable_nesting_decode_info deep. + // test nesting levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info deep. constexpr cudf::size_type num_nesting_levels = 16; - static_assert(num_nesting_levels > cudf::io::parquet::gpu::max_cacheable_nesting_decode_info); + static_assert(num_nesting_levels > cudf::io::parquet::detail::max_cacheable_nesting_decode_info); constexpr cudf::size_type rows_per_level = 2; constexpr cudf::size_type num_values = (1 << num_nesting_levels) * rows_per_level; @@ -5206,13 +5211,13 @@ TEST_F(ParquetWriterTest, SingleValueDictionaryTest) // make sure dictionary was used auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto used_dict = [&fmd]() { for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) { - if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or - enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) { + if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or + enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) { return true; } } @@ -5252,13 +5257,13 @@ TEST_F(ParquetWriterTest, DictionaryNeverTest) // make sure dictionary was not used auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto used_dict = [&fmd]() { for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) { - if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or - enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) { + if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or + enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) { return true; } } @@ -5303,13 +5308,13 @@ TEST_F(ParquetWriterTest, DictionaryAdaptiveTest) // make sure dictionary was used as expected. col0 should use one, // col1 should not. auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto used_dict = [&fmd](int col) { for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) { - if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or - enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) { + if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or + enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) { return true; } } @@ -5354,13 +5359,13 @@ TEST_F(ParquetWriterTest, DictionaryAlwaysTest) // make sure dictionary was used for both columns auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto used_dict = [&fmd](int col) { for (auto enc : fmd.row_groups[0].columns[col].meta_data.encodings) { - if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or - enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) { + if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or + enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) { return true; } } @@ -5438,13 +5443,13 @@ TEST_P(ParquetSizedTest, DictionaryTest) // make sure dictionary was used auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto used_dict = [&fmd]() { for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) { - if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or - enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) { + if (enc == cudf::io::parquet::detail::Encoding::PLAIN_DICTIONARY or + enc == cudf::io::parquet::detail::Encoding::RLE_DICTIONARY) { return true; } } @@ -6664,7 +6669,7 @@ TEST_F(ParquetWriterTest, PreserveNullability) TEST_P(ParquetV2Test, CheckEncodings) { - using cudf::io::parquet::Encoding; + using cudf::io::parquet::detail::Encoding; constexpr auto num_rows = 100'000; auto const is_v2 = GetParam(); @@ -6697,7 +6702,7 @@ TEST_P(ParquetV2Test, CheckEncodings) }; auto const source = cudf::io::datasource::create(filepath); - cudf::io::parquet::FileMetaData fmd; + cudf::io::parquet::detail::FileMetaData fmd; read_footer(source, &fmd); auto const& chunk0_enc = fmd.row_groups[0].columns[0].meta_data.encodings; From e345620ddaf5d8ac87e2428a84508ecfec2ba4f8 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 9 Oct 2023 10:13:25 -0700 Subject: [PATCH 46/76] Add stream parameter to List Manipulation and Operations APIs (#14248) I have organized the public List APIs into **three** distinct categories based on their functionality, simplifying the PRs for easier and shorter reviews. This particular PR introduces the `stream` parameter only to the `List Manipulation and Operations APIs`, which fall under `Section 1`. See next comment for other sections. 1. List Manipulation and Operations (`combine.hpp`, `contains.hpp`, `count_elements.hpp`) ``` concatenate_rows concatenate_list_elements contains_nulls contains - search_keys contains - search_key index_of - search_keys index_of - search_key count_elements ``` This PR addresses issues in the following files: 1. **column_wrapper.hpp**: - Corrects the improper passing of the stream value in the `make_lists_column` function. - Enables the missing cast to `lists_column_view`. - Substitutes `copy_bitmask` with `cudf::detail::copy_bitmask` to include the stream parameter. 2. **concatenate.cu:** - Substitutes `create_null_mask` with `cudf::detail::create_null_mask` to include the stream parameter. Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) - Vyas Ramasubramani (https://github.com/vyasr) - Yunsong Wang (https://github.com/PointKernel) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/14248 --- cpp/include/cudf/lists/combine.hpp | 4 + cpp/include/cudf/lists/contains.hpp | 14 ++- cpp/include/cudf/lists/count_elements.hpp | 2 + cpp/include/cudf_test/column_wrapper.hpp | 45 +++++++--- .../combine/concatenate_list_elements.cu | 3 +- cpp/src/lists/combine/concatenate_rows.cu | 3 +- cpp/src/lists/contains.cu | 37 ++++---- cpp/src/lists/copying/concatenate.cu | 5 +- cpp/src/lists/count_elements.cu | 3 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/lists_test.cpp | 87 +++++++++++++++++++ 11 files changed, 169 insertions(+), 35 deletions(-) create mode 100644 cpp/tests/streams/lists_test.cpp diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index 0bc76828fc3..0d9c1c157eb 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -57,6 +57,7 @@ enum class concatenate_null_policy { IGNORE, NULLIFY_OUTPUT_ROW }; * @param input Table of lists to be concatenated. * @param null_policy The parameter to specify whether a null list element will be ignored from * concatenation, or any concatenation involving a null element will result in a null list. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return A new column in which each row is a list resulted from concatenating all list elements in * the corresponding row of the input table. @@ -64,6 +65,7 @@ enum class concatenate_null_policy { IGNORE, NULLIFY_OUTPUT_ROW }; std::unique_ptr concatenate_rows( table_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -86,6 +88,7 @@ std::unique_ptr concatenate_rows( * @param input The lists column containing lists of list elements to concatenate. * @param null_policy The parameter to specify whether a null list element will be ignored from * concatenation, or any concatenation involving a null element will result in a null list. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return A new column in which each row is a list resulted from concatenating all list elements in * the corresponding row of the input lists column. @@ -93,6 +96,7 @@ std::unique_ptr concatenate_rows( std::unique_ptr concatenate_list_elements( column_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index 21c2ca1d64e..7cf67ec9205 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -42,12 +42,14 @@ namespace lists { * * @param lists Lists column whose `n` rows are to be searched * @param search_key The scalar key to be looked up in each list row + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::scalar const& search_key, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -63,13 +65,15 @@ std::unique_ptr contains( * 2. The list row `lists[i]` is null * * @param lists Lists column whose `n` rows are to be searched - * @param search_keys Column of elements to be looked up in each list row + * @param search_keys Column of elements to be looked up in each list row. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::column_view const& search_keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -84,12 +88,14 @@ std::unique_ptr contains( * A row with an empty list will always return false. * Nulls inside non-null nested elements (such as lists or structs) are not considered. * - * @param lists Lists column whose `n` rows are to be searched + * @param lists Lists column whose `n` rows are to be searched. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory * @return BOOL8 column of `n` rows with the result of the lookup */ std::unique_ptr contains_nulls( cudf::lists_column_view const& lists, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -125,6 +131,7 @@ enum class duplicate_find_option : int32_t { * @param search_key The scalar key to be looked up in each list row * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return column of `n` rows with the location of the `search_key` */ @@ -132,6 +139,7 @@ std::unique_ptr index_of( cudf::lists_column_view const& lists, cudf::scalar const& search_key, duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -160,6 +168,7 @@ std::unique_ptr index_of( * `lists` * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or * last (`FIND_LAST`) + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return column of `n` rows with the location of the `search_key` */ @@ -167,6 +176,7 @@ std::unique_ptr index_of( cudf::lists_column_view const& lists, cudf::column_view const& search_keys, duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp index 552ba058b93..e4bd0dca9ae 100644 --- a/cpp/include/cudf/lists/count_elements.hpp +++ b/cpp/include/cudf/lists/count_elements.hpp @@ -45,11 +45,13 @@ namespace lists { * in the output column. * * @param input Input lists column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New column with the number of elements for each row */ std::unique_ptr count_elements( lists_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of lists_elements group diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index c0932b81dc3..e94dfea9dcf 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1281,6 +1282,11 @@ class dictionary_column_wrapper : public detail::column_wrapper { template class lists_column_wrapper : public detail::column_wrapper { public: + /** + * @brief Cast to lists_column_view + */ + operator lists_column_view() const { return cudf::lists_column_view{wrapped->view()}; } + /** * @brief Construct a lists column containing a single list of fixed-width * type from an initializer list of values. @@ -1542,8 +1548,12 @@ class lists_column_wrapper : public detail::column_wrapper { rmm::device_buffer&& null_mask) { // construct the list column - wrapped = make_lists_column( - num_rows, std::move(offsets), std::move(values), null_count, std::move(null_mask)); + wrapped = make_lists_column(num_rows, + std::move(offsets), + std::move(values), + null_count, + std::move(null_mask), + cudf::test::get_default_stream()); } /** @@ -1618,8 +1628,12 @@ class lists_column_wrapper : public detail::column_wrapper { }(); // construct the list column - wrapped = make_lists_column( - cols.size(), std::move(offsets), std::move(data), null_count, std::move(null_mask)); + wrapped = make_lists_column(cols.size(), + std::move(offsets), + std::move(data), + null_count, + std::move(null_mask), + cudf::test::get_default_stream()); } /** @@ -1647,8 +1661,12 @@ class lists_column_wrapper : public detail::column_wrapper { depth = 0; size_type num_elements = offsets->size() == 0 ? 0 : offsets->size() - 1; - wrapped = - make_lists_column(num_elements, std::move(offsets), std::move(c), 0, rmm::device_buffer{}); + wrapped = make_lists_column(num_elements, + std::move(offsets), + std::move(c), + 0, + rmm::device_buffer{}, + cudf::test::get_default_stream()); } /** @@ -1697,12 +1715,15 @@ class lists_column_wrapper : public detail::column_wrapper { } lists_column_view lcv(col); - return make_lists_column(col.size(), - std::make_unique(lcv.offsets()), - normalize_column(lists_column_view(col).child(), - lists_column_view(expected_hierarchy).child()), - col.null_count(), - copy_bitmask(col)); + return make_lists_column( + col.size(), + std::make_unique(lcv.offsets()), + normalize_column(lists_column_view(col).child(), + lists_column_view(expected_hierarchy).child()), + col.null_count(), + cudf::detail::copy_bitmask( + col, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + cudf::test::get_default_stream()); } std::pair, std::vector>> preprocess_columns( diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index fbe297765f8..99dbd55678b 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -271,10 +271,11 @@ std::unique_ptr concatenate_list_elements(column_view const& input, */ std::unique_ptr concatenate_list_elements(column_view const& input, concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate_list_elements(input, null_policy, cudf::get_default_stream(), mr); + return detail::concatenate_list_elements(input, null_policy, stream, mr); } } // namespace lists diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index 658538b0195..49be7b5ff17 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -305,10 +305,11 @@ std::unique_ptr concatenate_rows(table_view const& input, */ std::unique_ptr concatenate_rows(table_view const& input, concatenate_null_policy null_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate_rows(input, null_policy, cudf::get_default_stream(), mr); + return detail::concatenate_rows(input, null_policy, stream, mr); } } // namespace lists diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index df1d043bdb6..4733a5d63a8 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -287,7 +287,7 @@ std::unique_ptr index_of(lists_column_view const& lists, } auto search_key_col = cudf::make_column_from_scalar(search_key, lists.size(), stream, mr); - return index_of(lists, search_key_col->view(), find_option, stream, mr); + return detail::index_of(lists, search_key_col->view(), find_option, stream, mr); } std::unique_ptr index_of(lists_column_view const& lists, @@ -306,11 +306,11 @@ std::unique_ptr contains(lists_column_view const& lists, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto key_indices = index_of(lists, - search_key, - duplicate_find_option::FIND_FIRST, - stream, - rmm::mr::get_current_device_resource()); + auto key_indices = detail::index_of(lists, + search_key, + duplicate_find_option::FIND_FIRST, + stream, + rmm::mr::get_current_device_resource()); return to_contains(std::move(key_indices), stream, mr); } @@ -322,11 +322,11 @@ std::unique_ptr contains(lists_column_view const& lists, CUDF_EXPECTS(search_keys.size() == lists.size(), "Number of search keys must match list column size."); - auto key_indices = index_of(lists, - search_keys, - duplicate_find_option::FIND_FIRST, - stream, - rmm::mr::get_current_device_resource()); + auto key_indices = detail::index_of(lists, + search_keys, + duplicate_find_option::FIND_FIRST, + stream, + rmm::mr::get_current_device_resource()); return to_contains(std::move(key_indices), stream, mr); } @@ -364,43 +364,48 @@ std::unique_ptr contains_nulls(lists_column_view const& lists, std::unique_ptr contains(lists_column_view const& lists, cudf::scalar const& search_key, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(lists, search_key, cudf::get_default_stream(), mr); + return detail::contains(lists, search_key, stream, mr); } std::unique_ptr contains(lists_column_view const& lists, column_view const& search_keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(lists, search_keys, cudf::get_default_stream(), mr); + return detail::contains(lists, search_keys, stream, mr); } std::unique_ptr contains_nulls(lists_column_view const& lists, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains_nulls(lists, cudf::get_default_stream(), mr); + return detail::contains_nulls(lists, stream, mr); } std::unique_ptr index_of(lists_column_view const& lists, cudf::scalar const& search_key, duplicate_find_option find_option, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::index_of(lists, search_key, find_option, cudf::get_default_stream(), mr); + return detail::index_of(lists, search_key, find_option, stream, mr); } std::unique_ptr index_of(lists_column_view const& lists, column_view const& search_keys, duplicate_find_option find_option, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::index_of(lists, search_keys, find_option, cudf::get_default_stream(), mr); + return detail::index_of(lists, search_keys, find_option, stream, mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index ddd0dfbe084..5407b88236f 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -123,8 +124,8 @@ std::unique_ptr concatenate(host_span columns, // if any of the input columns have nulls, construct the output mask bool const has_nulls = std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); - rmm::device_buffer null_mask = create_null_mask( - total_list_count, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED); + rmm::device_buffer null_mask = cudf::detail::create_null_mask( + total_list_count, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED, stream, mr); auto null_mask_data = static_cast(null_mask.data()); auto const null_count = has_nulls ? cudf::detail::concatenate_masks(columns, null_mask_data, stream) : size_type{0}; diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index 40a14d805e1..2fd0851067a 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -73,10 +73,11 @@ std::unique_ptr count_elements(lists_column_view const& input, // external APIS std::unique_ptr count_elements(lists_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::count_elements(input, cudf::get_default_stream(), mr); + return detail::count_elements(input, stream, mr); } } // namespace lists diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ac13c121530..ffaba7d6fa7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -638,6 +638,7 @@ ConfigureTest( ) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/lists_test.cpp b/cpp/tests/streams/lists_test.cpp new file mode 100644 index 00000000000..e292b551d83 --- /dev/null +++ b/cpp/tests/streams/lists_test.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +class ListTest : public cudf::test::BaseFixture {}; + +TEST_F(ListTest, ConcatenateRows) +{ + cudf::test::lists_column_wrapper list_col_1{{0, 1}, {2, 3}, {4, 5}}; + cudf::test::lists_column_wrapper list_col_2{{0, 1}, {2, 3}, {4, 5}}; + cudf::table_view lists_table({list_col_1, list_col_2}); + cudf::lists::concatenate_rows( + lists_table, cudf::lists::concatenate_null_policy::IGNORE, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ConcatenateListElements) +{ + cudf::test::lists_column_wrapper ll_column{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}; + cudf::lists::concatenate_list_elements( + ll_column, cudf::lists::concatenate_null_policy::IGNORE, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ContainsNulls) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3}, {4, 5}}; + cudf::lists::contains_nulls(list_col, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ContainsSearchKey) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3}, {4, 5}}; + cudf::numeric_scalar search_key(2, true, cudf::test::get_default_stream()); + cudf::lists::contains(list_col, search_key, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ContainsSearchKeys) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3}, {4, 5}}; + cudf::test::fixed_width_column_wrapper search_keys({1, 2, 3}); + cudf::lists::contains(list_col, search_keys, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, IndexOfSearchKey) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3}, {4, 5}}; + cudf::numeric_scalar search_key(2, true, cudf::test::get_default_stream()); + cudf::lists::index_of(list_col, + search_key, + cudf::lists::duplicate_find_option::FIND_FIRST, + cudf::test::get_default_stream()); +} + +TEST_F(ListTest, IndexOfSearchKeys) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3}, {4, 5}}; + cudf::test::fixed_width_column_wrapper search_keys({1, 2, 3}); + cudf::lists::index_of(list_col, + search_keys, + cudf::lists::duplicate_find_option::FIND_FIRST, + cudf::test::get_default_stream()); +} + +TEST_F(ListTest, CountElements) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7}, {4, 5}}; + cudf::lists::count_elements(list_col, cudf::test::get_default_stream()); +} From b4fd77b30311f3b1de39cac22423f2c3a32ec72d Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Tue, 10 Oct 2023 12:20:42 -0500 Subject: [PATCH 47/76] Centralize chunked reading code in the parquet reader to reader_impl_chunking.cu (#14262) As a precursor to further chunked reader work, this PR centralizes chunk-related code (mostly from the `reader::impl` class) into `reader_impl_chunking.cu` and `reader_impl_chunking.hpp`. Also cleans up some variable naming and locations in `reader::impl` and the `file_intermediate_data` and `pass_intermediate_data classes`. Authors: - https://github.com/nvdbaranec Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Robert Maynard (https://github.com/robertmaynard) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14262 --- cpp/CMakeLists.txt | 1 + cpp/src/io/parquet/parquet_gpu.hpp | 73 --- cpp/src/io/parquet/reader_impl.cpp | 12 +- cpp/src/io/parquet/reader_impl.hpp | 49 +- cpp/src/io/parquet/reader_impl_chunking.cu | 598 +++++++++++++++++++ cpp/src/io/parquet/reader_impl_chunking.hpp | 87 +++ cpp/src/io/parquet/reader_impl_helpers.hpp | 17 + cpp/src/io/parquet/reader_impl_preprocess.cu | 558 +---------------- cpp/src/io/utilities/column_buffer.cpp | 10 +- 9 files changed, 751 insertions(+), 654 deletions(-) create mode 100644 cpp/src/io/parquet/reader_impl_chunking.cu create mode 100644 cpp/src/io/parquet/reader_impl_chunking.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 000f80065ab..f8b9762f1d4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -401,6 +401,7 @@ add_library( src/io/parquet/predicate_pushdown.cpp src/io/parquet/reader.cpp src/io/parquet/reader_impl.cpp + src/io/parquet/reader_impl_chunking.cu src/io/parquet/reader_impl_helpers.cpp src/io/parquet/reader_impl_preprocess.cu src/io/parquet/writer_impl.cu diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 767668cc65e..6a93fec0c46 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -318,79 +318,6 @@ struct ColumnChunkDesc { int32_t src_col_schema{}; // my schema index in the file }; -/** - * @brief The row_group_info class - */ -struct row_group_info { - size_type index; // row group index within a file. aggregate_reader_metadata::get_row_group() is - // called with index and source_index - size_t start_row; - size_type source_index; // file index. - - row_group_info() = default; - - row_group_info(size_type index, size_t start_row, size_type source_index) - : index{index}, start_row{start_row}, source_index{source_index} - { - } -}; - -/** - * @brief Struct to store file-level data that remains constant for - * all passes/chunks for the file. - */ -struct file_intermediate_data { - // all row groups to read - std::vector row_groups{}; - - // all chunks from the selected row groups. We may end up reading these chunks progressively - // instead of all at once - std::vector chunks{}; - - // skip_rows/num_rows values for the entire file. these need to be adjusted per-pass because we - // may not be visiting every row group that contains these bounds - size_t global_skip_rows; - size_t global_num_rows; -}; - -/** - * @brief Structs to identify the reading row range for each chunk of rows in chunked reading. - */ -struct chunk_read_info { - size_t skip_rows; - size_t num_rows; -}; - -/** - * @brief Struct to store pass-level data that remains constant for a single pass. - */ -struct pass_intermediate_data { - std::vector> raw_page_data; - rmm::device_buffer decomp_page_data; - - // rowgroup, chunk and page information for the current pass. - std::vector row_groups{}; - cudf::detail::hostdevice_vector chunks{}; - cudf::detail::hostdevice_vector pages_info{}; - cudf::detail::hostdevice_vector page_nesting_info{}; - cudf::detail::hostdevice_vector page_nesting_decode_info{}; - - rmm::device_uvector page_keys{0, rmm::cuda_stream_default}; - rmm::device_uvector page_index{0, rmm::cuda_stream_default}; - rmm::device_uvector str_dict_index{0, rmm::cuda_stream_default}; - - std::vector output_chunk_read_info; - std::size_t current_output_chunk{0}; - - rmm::device_buffer level_decode_data{}; - int level_type_size{0}; - - // skip_rows and num_rows values for this particular pass. these may be adjusted values from the - // global values stored in file_intermediate_data. - size_t skip_rows; - size_t num_rows; -}; - /** * @brief Struct describing an encoder column */ diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 26ec83d5946..db81222157a 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -349,14 +349,14 @@ void reader::impl::prepare_data(int64_t skip_rows, not _input_columns.empty()) { // fills in chunk information without physically loading or decompressing // the associated data - load_global_chunk_info(); + create_global_chunk_info(); // compute schedule of input reads. Each rowgroup contains 1 chunk per column. For now // we will read an entire row group at a time. However, it is possible to do // sub-rowgroup reads if we made some estimates on individual chunk sizes (tricky) and // changed the high level structure such that we weren't always reading an entire table's // worth of columns at once. - compute_input_pass_row_group_info(); + compute_input_passes(); } _file_preprocessed = true; @@ -364,7 +364,7 @@ void reader::impl::prepare_data(int64_t skip_rows, // if we have to start a new pass, do that now if (!_pass_preprocessed) { - auto const num_passes = _input_pass_row_group_offsets.size() - 1; + auto const num_passes = _file_itm_data.input_pass_row_group_offsets.size() - 1; // always create the pass struct, even if we end up with no passes. // this will also cause the previous pass information to be deleted @@ -373,7 +373,7 @@ void reader::impl::prepare_data(int64_t skip_rows, if (_file_itm_data.global_num_rows > 0 && not _file_itm_data.row_groups.empty() && not _input_columns.empty() && _current_input_pass < num_passes) { // setup the pass_intermediate_info for this pass. - setup_pass(); + setup_next_pass(); load_and_decompress_data(); preprocess_pages(uses_custom_row_bounds, _output_chunk_read_limit); @@ -541,8 +541,8 @@ bool reader::impl::has_next() {} /*row_group_indices, empty means read all row groups*/, std::nullopt /*filter*/); - auto const num_input_passes = - _input_pass_row_group_offsets.size() == 0 ? 0 : _input_pass_row_group_offsets.size() - 1; + size_t const num_input_passes = std::max( + int64_t{0}, static_cast(_file_itm_data.input_pass_row_group_offsets.size()) - 1); return (_pass_itm_data->current_output_chunk < _pass_itm_data->output_chunk_read_info.size()) || (_current_input_pass < num_input_passes); } diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 6003b931b04..cea4ba35606 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -22,6 +22,7 @@ #pragma once #include "parquet_gpu.hpp" +#include "reader_impl_chunking.hpp" #include "reader_impl_helpers.hpp" #include @@ -136,10 +137,6 @@ class reader::impl { host_span const> row_group_indices, std::optional> filter); - void load_global_chunk_info(); - void compute_input_pass_row_group_info(); - void setup_pass(); - /** * @brief Create chunk information and start file reads * @@ -250,6 +247,31 @@ class reader::impl { */ void decode_page_data(size_t skip_rows, size_t num_rows); + /** + * @brief Creates file-wide parquet chunk information. + * + * Creates information about all chunks in the file, storing it in + * the file-wide _file_itm_data structure. + */ + void create_global_chunk_info(); + + /** + * @brief Computes all of the passes we will perform over the file. + */ + void compute_input_passes(); + + /** + * @brief Close out the existing pass (if any) and prepare for the next pass. + */ + void setup_next_pass(); + + /** + * @brief Given a set of pages that have had their sizes computed by nesting level and + * a limit on total read size, generate a set of {skip_rows, num_rows} pairs representing + * a set of reads that will generate output columns of total size <= `chunk_read_limit` bytes. + */ + void compute_splits_for_pass(); + private: rmm::cuda_stream_view _stream; rmm::mr::device_memory_resource* _mr = nullptr; @@ -278,7 +300,7 @@ class reader::impl { // chunked reading happens in 2 parts: // - // At the top level there is the "pass" in which we try and limit the + // At the top level, the entire file is divided up into "passes" omn which we try and limit the // total amount of temporary memory (compressed data, decompressed data) in use // via _input_pass_read_limit. // @@ -286,19 +308,16 @@ class reader::impl { // byte size is controlled by _output_chunk_read_limit. file_intermediate_data _file_itm_data; + bool _file_preprocessed{false}; + std::unique_ptr _pass_itm_data; + bool _pass_preprocessed{false}; - // an array of offsets into _file_itm_data::global_chunks. Each pair of offsets represents - // the start/end of the chunks to be loaded for a given pass. - std::vector _input_pass_row_group_offsets{}; - std::vector _input_pass_row_count{}; - std::size_t _current_input_pass{0}; - std::size_t _chunk_count{0}; + std::size_t _output_chunk_read_limit{0}; // output chunk size limit in bytes + std::size_t _input_pass_read_limit{0}; // input pass memory usage limit in bytes - std::size_t _output_chunk_read_limit{0}; - std::size_t _input_pass_read_limit{0}; - bool _pass_preprocessed{false}; - bool _file_preprocessed{false}; + std::size_t _current_input_pass{0}; // current input pass index + std::size_t _chunk_count{0}; // how many output chunks we have produced }; } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu new file mode 100644 index 00000000000..ad52a7dfcc1 --- /dev/null +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -0,0 +1,598 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "reader_impl.hpp" +#include "reader_impl_chunking.hpp" + +#include +#include + +#include + +#include + +#include +#include +#include +#include + +namespace cudf::io::parquet::detail { + +namespace { + +struct cumulative_row_info { + size_t row_count; // cumulative row count + size_t size_bytes; // cumulative size in bytes + int key; // schema index +}; + +#if defined(CHUNKING_DEBUG) +void print_cumulative_page_info(cudf::detail::hostdevice_vector& pages, + rmm::device_uvector const& page_index, + rmm::device_uvector const& c_info, + rmm::cuda_stream_view stream) +{ + pages.device_to_host_sync(stream); + + printf("------------\nCumulative sizes by page\n"); + + std::vector schemas(pages.size()); + std::vector h_page_index(pages.size()); + CUDF_CUDA_TRY(cudaMemcpy( + h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault)); + std::vector h_cinfo(pages.size()); + CUDF_CUDA_TRY(cudaMemcpy( + h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault)); + auto schema_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; }); + thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin()); + auto last = thrust::unique(thrust::seq, schemas.begin(), schemas.end()); + schemas.resize(last - schemas.begin()); + printf("Num schemas: %lu\n", schemas.size()); + + for (size_t idx = 0; idx < schemas.size(); idx++) { + printf("Schema %d\n", schemas[idx]); + for (size_t pidx = 0; pidx < pages.size(); pidx++) { + auto const& page = pages[h_page_index[pidx]]; + if (page.flags & PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) { + continue; + } + printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes); + } + } +} + +void print_cumulative_row_info(host_span sizes, + std::string const& label, + std::optional> splits = std::nullopt) +{ + if (splits.has_value()) { + printf("------------\nSplits\n"); + for (size_t idx = 0; idx < splits->size(); idx++) { + printf("{%lu, %lu}\n", splits.value()[idx].skip_rows, splits.value()[idx].num_rows); + } + } + + printf("------------\nCumulative sizes %s\n", label.c_str()); + for (size_t idx = 0; idx < sizes.size(); idx++) { + printf("{%lu, %lu, %d}", sizes[idx].row_count, sizes[idx].size_bytes, sizes[idx].key); + if (splits.has_value()) { + // if we have a split at this row count and this is the last instance of this row count + auto start = thrust::make_transform_iterator( + splits->begin(), [](chunk_read_info const& i) { return i.skip_rows; }); + auto end = start + splits->size(); + auto split = std::find(start, end, sizes[idx].row_count); + auto const split_index = [&]() -> int { + if (split != end && + ((idx == sizes.size() - 1) || (sizes[idx + 1].row_count > sizes[idx].row_count))) { + return static_cast(std::distance(start, split)); + } + return idx == 0 ? 0 : -1; + }(); + if (split_index >= 0) { + printf(" <-- split {%lu, %lu}", + splits.value()[split_index].skip_rows, + splits.value()[split_index].num_rows); + } + } + printf("\n"); + } +} +#endif // CHUNKING_DEBUG + +/** + * @brief Functor which reduces two cumulative_row_info structs of the same key. + */ +struct cumulative_row_sum { + cumulative_row_info operator() + __device__(cumulative_row_info const& a, cumulative_row_info const& b) const + { + return cumulative_row_info{a.row_count + b.row_count, a.size_bytes + b.size_bytes, a.key}; + } +}; + +/** + * @brief Functor which computes the total data size for a given type of cudf column. + * + * In the case of strings, the return size does not include the chars themselves. That + * information is tracked separately (see PageInfo::str_bytes). + */ +struct row_size_functor { + __device__ size_t validity_size(size_t num_rows, bool nullable) + { + return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) * 4) : 0; + } + + template + __device__ size_t operator()(size_t num_rows, bool nullable) + { + auto const element_size = sizeof(device_storage_type_t); + return (element_size * num_rows) + validity_size(num_rows, nullable); + } +}; + +template <> +__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) +{ + auto const offset_size = sizeof(size_type); + // NOTE: Adding the + 1 offset here isn't strictly correct. There will only be 1 extra offset + // for the entire column, whereas this is adding an extra offset per page. So we will get a + // small over-estimate of the real size of the order : # of pages * 4 bytes. It seems better + // to overestimate size somewhat than to underestimate it and potentially generate chunks + // that are too large. + return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable); +} + +template <> +__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) +{ + return validity_size(num_rows, nullable); +} + +template <> +__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) +{ + // only returns the size of offsets and validity. the size of the actual string chars + // is tracked separately. + auto const offset_size = sizeof(size_type); + // see note about offsets in the list_view template. + return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable); +} + +/** + * @brief Functor which computes the total output cudf data size for all of + * the data in this page. + * + * Sums across all nesting levels. + */ +struct get_cumulative_row_info { + PageInfo const* const pages; + + __device__ cumulative_row_info operator()(size_type index) + { + auto const& page = pages[index]; + if (page.flags & PAGEINFO_FLAGS_DICTIONARY) { + return cumulative_row_info{0, 0, page.src_col_schema}; + } + + // total nested size, not counting string data + auto iter = + cudf::detail::make_counting_transform_iterator(0, [page, index] __device__(size_type i) { + auto const& pni = page.nesting[i]; + return cudf::type_dispatcher( + data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable); + }); + + size_t const row_count = static_cast(page.nesting[0].size); + return { + row_count, + thrust::reduce(thrust::seq, iter, iter + page.num_output_nesting_levels) + page.str_bytes, + page.src_col_schema}; + } +}; + +/** + * @brief Functor which computes the effective size of all input columns by page. + * + * For a given row, we want to find the cost of all pages for all columns involved + * in loading up to that row. The complication here is that not all pages are the + * same size between columns. Example: + * + * page row counts + * Column A: 0 <----> 100 <----> 200 + * Column B: 0 <---------------> 200 <--------> 400 + | + * if we decide to split at row 100, we don't really know the actual amount of bytes in column B + * at that point. So we have to proceed as if we are taking the bytes from all 200 rows of that + * page. Essentially, a conservative over-estimate of the real size. + */ +struct row_total_size { + cumulative_row_info const* c_info; + size_type const* key_offsets; + size_t num_keys; + + __device__ cumulative_row_info operator()(cumulative_row_info const& i) + { + // sum sizes for each input column at this row + size_t sum = 0; + for (int idx = 0; idx < num_keys; idx++) { + auto const start = key_offsets[idx]; + auto const end = key_offsets[idx + 1]; + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [&] __device__(size_type i) { return c_info[i].row_count; }); + auto const page_index = + thrust::lower_bound(thrust::seq, iter + start, iter + end, i.row_count) - iter; + sum += c_info[page_index].size_bytes; + } + return {i.row_count, sum, i.key}; + } +}; + +/** + * @brief Given a vector of cumulative {row_count, byte_size} pairs and a chunk read + * limit, determine the set of splits. + * + * @param sizes Vector of cumulative {row_count, byte_size} pairs + * @param num_rows Total number of rows to read + * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns + */ +std::vector find_splits(std::vector const& sizes, + size_t num_rows, + size_t chunk_read_limit) +{ + // now we have an array of {row_count, real output bytes}. just walk through it and generate + // splits. + // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch + // sizes are reasonably large, this shouldn't iterate too many times + std::vector splits; + { + size_t cur_pos = 0; + size_t cur_cumulative_size = 0; + size_t cur_row_count = 0; + auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) { + return i.size_bytes - cur_cumulative_size; + }); + auto end = start + sizes.size(); + while (cur_row_count < num_rows) { + int64_t split_pos = + thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start; + + // if we're past the end, or if the returned bucket is > than the chunk_read_limit, move back + // one. + if (static_cast(split_pos) >= sizes.size() || + (sizes[split_pos].size_bytes - cur_cumulative_size > chunk_read_limit)) { + split_pos--; + } + + // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in + // a loop because all of the cumulative sizes for all the pages are sorted into one big list. + // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in + // the list twice. so we have to iterate until we skip past all of them. The idea is that we + // either do this, or we have to call unique() on the input first. + while (split_pos < (static_cast(sizes.size()) - 1) && + (split_pos < 0 || sizes[split_pos].row_count == cur_row_count)) { + split_pos++; + } + + auto const start_row = cur_row_count; + cur_row_count = sizes[split_pos].row_count; + splits.push_back(chunk_read_info{start_row, cur_row_count - start_row}); + cur_pos = split_pos; + cur_cumulative_size = sizes[split_pos].size_bytes; + } + } + // print_cumulative_row_info(sizes, "adjusted", splits); + + return splits; +} + +/** + * @brief Converts cuDF units to Parquet units. + * + * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type. + */ +[[nodiscard]] std::tuple conversion_info(type_id column_type_id, + type_id timestamp_type_id, + Type physical, + int8_t converted, + int32_t length) +{ + int32_t type_width = (physical == FIXED_LEN_BYTE_ARRAY) ? length : 0; + int32_t clock_rate = 0; + if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) { + type_width = 1; // I32 -> I8 + } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) { + type_width = 2; // I32 -> I16 + } else if (column_type_id == type_id::INT32) { + type_width = 4; // str -> hash32 + } else if (is_chrono(data_type{column_type_id})) { + clock_rate = to_clockrate(timestamp_type_id); + } + + int8_t converted_type = converted; + if (converted_type == DECIMAL && column_type_id != type_id::FLOAT64 && + not cudf::is_fixed_point(data_type{column_type_id})) { + converted_type = UNKNOWN; // Not converting to float64 or decimal + } + return std::make_tuple(type_width, clock_rate, converted_type); +} + +/** + * @brief Return the required number of bits to store a value. + */ +template +[[nodiscard]] T required_bits(uint32_t max_level) +{ + return static_cast(CompactProtocolReader::NumRequiredBits(max_level)); +} + +struct row_count_compare { + __device__ bool operator()(cumulative_row_info const& a, cumulative_row_info const& b) + { + return a.row_count < b.row_count; + } +}; + +} // anonymous namespace + +void reader::impl::create_global_chunk_info() +{ + auto const num_rows = _file_itm_data.global_num_rows; + auto const& row_groups_info = _file_itm_data.row_groups; + auto& chunks = _file_itm_data.chunks; + + // Descriptors for all the chunks that make up the selected columns + auto const num_input_columns = _input_columns.size(); + auto const num_chunks = row_groups_info.size() * num_input_columns; + + // Initialize column chunk information + auto remaining_rows = num_rows; + for (auto const& rg : row_groups_info) { + auto const& row_group = _metadata->get_row_group(rg.index, rg.source_index); + auto const row_group_start = rg.start_row; + auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); + + // generate ColumnChunkDesc objects for everything to be decoded (all input columns) + for (size_t i = 0; i < num_input_columns; ++i) { + auto col = _input_columns[i]; + // look up metadata + auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); + auto& schema = _metadata->get_schema(col.schema_idx); + + auto [type_width, clock_rate, converted_type] = + conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()), + _timestamp_type.id(), + schema.type, + schema.converted_type, + schema.type_length); + + chunks.push_back(ColumnChunkDesc(col_meta.total_compressed_size, + nullptr, + col_meta.num_values, + schema.type, + type_width, + row_group_start, + row_group_rows, + schema.max_definition_level, + schema.max_repetition_level, + _metadata->get_output_nesting_depth(col.schema_idx), + required_bits(schema.max_definition_level), + required_bits(schema.max_repetition_level), + col_meta.codec, + converted_type, + schema.logical_type, + schema.decimal_precision, + clock_rate, + i, + col.schema_idx)); + } + + remaining_rows -= row_group_rows; + } +} + +void reader::impl::compute_input_passes() +{ + // at this point, row_groups has already been filtered down to just the row groups we need to + // handle optional skip_rows/num_rows parameters. + auto const& row_groups_info = _file_itm_data.row_groups; + + // if the user hasn't specified an input size limit, read everything in a single pass. + if (_input_pass_read_limit == 0) { + _file_itm_data.input_pass_row_group_offsets.push_back(0); + _file_itm_data.input_pass_row_group_offsets.push_back(row_groups_info.size()); + return; + } + + // generate passes. make sure to account for the case where a single row group doesn't fit within + // + std::size_t const read_limit = + _input_pass_read_limit > 0 ? _input_pass_read_limit : std::numeric_limits::max(); + std::size_t cur_pass_byte_size = 0; + std::size_t cur_rg_start = 0; + std::size_t cur_row_count = 0; + _file_itm_data.input_pass_row_group_offsets.push_back(0); + _file_itm_data.input_pass_row_count.push_back(0); + + for (size_t cur_rg_index = 0; cur_rg_index < row_groups_info.size(); cur_rg_index++) { + auto const& rgi = row_groups_info[cur_rg_index]; + auto const& row_group = _metadata->get_row_group(rgi.index, rgi.source_index); + + // can we add this row group + if (cur_pass_byte_size + row_group.total_byte_size >= read_limit) { + // A single row group (the current one) is larger than the read limit: + // We always need to include at least one row group, so end the pass at the end of the current + // row group + if (cur_rg_start == cur_rg_index) { + _file_itm_data.input_pass_row_group_offsets.push_back(cur_rg_index + 1); + _file_itm_data.input_pass_row_count.push_back(cur_row_count + row_group.num_rows); + cur_rg_start = cur_rg_index + 1; + cur_pass_byte_size = 0; + } + // End the pass at the end of the previous row group + else { + _file_itm_data.input_pass_row_group_offsets.push_back(cur_rg_index); + _file_itm_data.input_pass_row_count.push_back(cur_row_count); + cur_rg_start = cur_rg_index; + cur_pass_byte_size = row_group.total_byte_size; + } + } else { + cur_pass_byte_size += row_group.total_byte_size; + } + cur_row_count += row_group.num_rows; + } + // add the last pass if necessary + if (_file_itm_data.input_pass_row_group_offsets.back() != row_groups_info.size()) { + _file_itm_data.input_pass_row_group_offsets.push_back(row_groups_info.size()); + _file_itm_data.input_pass_row_count.push_back(cur_row_count); + } +} + +void reader::impl::setup_next_pass() +{ + // this will also cause the previous pass information to be deleted + _pass_itm_data = std::make_unique(); + + // setup row groups to be loaded for this pass + auto const row_group_start = _file_itm_data.input_pass_row_group_offsets[_current_input_pass]; + auto const row_group_end = _file_itm_data.input_pass_row_group_offsets[_current_input_pass + 1]; + auto const num_row_groups = row_group_end - row_group_start; + _pass_itm_data->row_groups.resize(num_row_groups); + std::copy(_file_itm_data.row_groups.begin() + row_group_start, + _file_itm_data.row_groups.begin() + row_group_end, + _pass_itm_data->row_groups.begin()); + + auto const num_passes = _file_itm_data.input_pass_row_group_offsets.size() - 1; + CUDF_EXPECTS(_current_input_pass < num_passes, "Encountered an invalid read pass index"); + + auto const chunks_per_rowgroup = _input_columns.size(); + auto const num_chunks = chunks_per_rowgroup * num_row_groups; + + auto chunk_start = _file_itm_data.chunks.begin() + (row_group_start * chunks_per_rowgroup); + auto chunk_end = _file_itm_data.chunks.begin() + (row_group_end * chunks_per_rowgroup); + + _pass_itm_data->chunks = cudf::detail::hostdevice_vector(num_chunks, _stream); + std::copy(chunk_start, chunk_end, _pass_itm_data->chunks.begin()); + + // adjust skip_rows and num_rows by what's available in the row groups we are processing + if (num_passes == 1) { + _pass_itm_data->skip_rows = _file_itm_data.global_skip_rows; + _pass_itm_data->num_rows = _file_itm_data.global_num_rows; + } else { + auto const global_start_row = _file_itm_data.global_skip_rows; + auto const global_end_row = global_start_row + _file_itm_data.global_num_rows; + auto const start_row = + std::max(_file_itm_data.input_pass_row_count[_current_input_pass], global_start_row); + auto const end_row = + std::min(_file_itm_data.input_pass_row_count[_current_input_pass + 1], global_end_row); + + // skip_rows is always global in the sense that it is relative to the first row of + // everything we will be reading, regardless of what pass we are on. + // num_rows is how many rows we are reading this pass. + _pass_itm_data->skip_rows = + global_start_row + _file_itm_data.input_pass_row_count[_current_input_pass]; + _pass_itm_data->num_rows = end_row - start_row; + } +} + +void reader::impl::compute_splits_for_pass() +{ + auto const skip_rows = _pass_itm_data->skip_rows; + auto const num_rows = _pass_itm_data->num_rows; + + // simple case : no chunk size, no splits + if (_output_chunk_read_limit <= 0) { + _pass_itm_data->output_chunk_read_info = std::vector{{skip_rows, num_rows}}; + return; + } + + auto& pages = _pass_itm_data->pages_info; + + auto const& page_keys = _pass_itm_data->page_keys; + auto const& page_index = _pass_itm_data->page_index; + + // generate cumulative row counts and sizes + rmm::device_uvector c_info(page_keys.size(), _stream); + // convert PageInfo to cumulative_row_info + auto page_input = thrust::make_transform_iterator(page_index.begin(), + get_cumulative_row_info{pages.device_ptr()}); + thrust::inclusive_scan_by_key(rmm::exec_policy(_stream), + page_keys.begin(), + page_keys.end(), + page_input, + c_info.begin(), + thrust::equal_to{}, + cumulative_row_sum{}); + // print_cumulative_page_info(pages, page_index, c_info, stream); + + // sort by row count + rmm::device_uvector c_info_sorted{c_info, _stream}; + thrust::sort( + rmm::exec_policy(_stream), c_info_sorted.begin(), c_info_sorted.end(), row_count_compare{}); + + // std::vector h_c_info_sorted(c_info_sorted.size()); + // CUDF_CUDA_TRY(cudaMemcpy(h_c_info_sorted.data(), + // c_info_sorted.data(), + // sizeof(cumulative_row_info) * c_info_sorted.size(), + // cudaMemcpyDefault)); + // print_cumulative_row_info(h_c_info_sorted, "raw"); + + // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per + // key + rmm::device_uvector key_offsets(page_keys.size() + 1, _stream); + auto const key_offsets_end = thrust::reduce_by_key(rmm::exec_policy(_stream), + page_keys.begin(), + page_keys.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), + key_offsets.begin()) + .second; + size_t const num_unique_keys = key_offsets_end - key_offsets.begin(); + thrust::exclusive_scan( + rmm::exec_policy(_stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin()); + + // adjust the cumulative info such that for each row count, the size includes any pages that span + // that row count. this is so that if we have this case: + // page row counts + // Column A: 0 <----> 100 <----> 200 + // Column B: 0 <---------------> 200 <--------> 400 + // | + // if we decide to split at row 100, we don't really know the actual amount of bytes in column B + // at that point. So we have to proceed as if we are taking the bytes from all 200 rows of that + // page. + // + rmm::device_uvector aggregated_info(c_info.size(), _stream); + thrust::transform(rmm::exec_policy(_stream), + c_info_sorted.begin(), + c_info_sorted.end(), + aggregated_info.begin(), + row_total_size{c_info.data(), key_offsets.data(), num_unique_keys}); + + // bring back to the cpu + std::vector h_aggregated_info(aggregated_info.size()); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_aggregated_info.data(), + aggregated_info.data(), + sizeof(cumulative_row_info) * c_info.size(), + cudaMemcpyDefault, + _stream.value())); + _stream.synchronize(); + + // generate the actual splits + _pass_itm_data->output_chunk_read_info = + find_splits(h_aggregated_info, num_rows, _output_chunk_read_limit); +} + +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp new file mode 100644 index 00000000000..dfc239d8451 --- /dev/null +++ b/cpp/src/io/parquet/reader_impl_chunking.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "reader_impl_helpers.hpp" + +#include + +namespace cudf::io::parquet::detail { + +/** + * @brief Struct to store file-level data that remains constant for + * all passes/chunks in the file. + */ +struct file_intermediate_data { + // all row groups to read + std::vector row_groups{}; + + // all chunks from the selected row groups. We may end up reading these chunks progressively + // instead of all at once + std::vector chunks{}; + + // an array of offsets into _file_itm_data::global_chunks. Each pair of offsets represents + // the start/end of the chunks to be loaded for a given pass. + std::vector input_pass_row_group_offsets{}; + // row counts per input-pass + std::vector input_pass_row_count{}; + + // skip_rows/num_rows values for the entire file. these need to be adjusted per-pass because we + // may not be visiting every row group that contains these bounds + size_t global_skip_rows; + size_t global_num_rows; +}; + +/** + * @brief Struct to identify the range for each chunk of rows during a chunked reading pass. + */ +struct chunk_read_info { + size_t skip_rows; + size_t num_rows; +}; + +/** + * @brief Struct to store pass-level data that remains constant for a single pass. + */ +struct pass_intermediate_data { + std::vector> raw_page_data; + rmm::device_buffer decomp_page_data; + + // rowgroup, chunk and page information for the current pass. + std::vector row_groups{}; + cudf::detail::hostdevice_vector chunks{}; + cudf::detail::hostdevice_vector pages_info{}; + cudf::detail::hostdevice_vector page_nesting_info{}; + cudf::detail::hostdevice_vector page_nesting_decode_info{}; + + rmm::device_uvector page_keys{0, rmm::cuda_stream_default}; + rmm::device_uvector page_index{0, rmm::cuda_stream_default}; + rmm::device_uvector str_dict_index{0, rmm::cuda_stream_default}; + + std::vector output_chunk_read_info; + std::size_t current_output_chunk{0}; + + rmm::device_buffer level_decode_data{}; + int level_type_size{0}; + + // skip_rows and num_rows values for this particular pass. these may be adjusted values from the + // global values stored in file_intermediate_data. + size_t skip_rows; + size_t num_rows; +}; + +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 1a73e2f55ac..8d8ab8707be 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -34,6 +34,23 @@ namespace cudf::io::parquet::detail { +/** + * @brief The row_group_info class + */ +struct row_group_info { + size_type index; // row group index within a file. aggregate_reader_metadata::get_row_group() is + // called with index and source_index + size_t start_row; + size_type source_index; // file index. + + row_group_info() = default; + + row_group_info(size_type index, size_t start_row, size_type source_index) + : index{index}, start_row{start_row}, source_index{source_index} + { + } +}; + /** * @brief Function that translates Parquet datatype to cuDF type enum */ diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 4bc6bb6f43b..ce45f709ee1 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -44,7 +43,6 @@ #include namespace cudf::io::parquet::detail { - namespace { /** @@ -170,46 +168,6 @@ void generate_depth_remappings(std::map, std::ve } } -/** - * @brief Return the required number of bits to store a value. - */ -template -[[nodiscard]] T required_bits(uint32_t max_level) -{ - return static_cast(CompactProtocolReader::NumRequiredBits(max_level)); -} - -/** - * @brief Converts cuDF units to Parquet units. - * - * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type. - */ -[[nodiscard]] std::tuple conversion_info(type_id column_type_id, - type_id timestamp_type_id, - Type physical, - int8_t converted, - int32_t length) -{ - int32_t type_width = (physical == FIXED_LEN_BYTE_ARRAY) ? length : 0; - int32_t clock_rate = 0; - if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) { - type_width = 1; // I32 -> I8 - } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) { - type_width = 2; // I32 -> I16 - } else if (column_type_id == type_id::INT32) { - type_width = 4; // str -> hash32 - } else if (is_chrono(data_type{column_type_id})) { - clock_rate = to_clockrate(timestamp_type_id); - } - - int8_t converted_type = converted; - if (converted_type == DECIMAL && column_type_id != type_id::FLOAT64 && - not cudf::is_fixed_point(data_type{column_type_id})) { - converted_type = UNKNOWN; // Not converting to float64 or decimal - } - return std::make_tuple(type_width, clock_rate, converted_type); -} - /** * @brief Reads compressed page data to device memory. * @@ -790,163 +748,6 @@ std::pair>> reader::impl::read_and_decompres return {total_decompressed_size > 0, std::move(read_chunk_tasks)}; } -void reader::impl::load_global_chunk_info() -{ - auto const num_rows = _file_itm_data.global_num_rows; - auto const& row_groups_info = _file_itm_data.row_groups; - auto& chunks = _file_itm_data.chunks; - - // Descriptors for all the chunks that make up the selected columns - auto const num_input_columns = _input_columns.size(); - auto const num_chunks = row_groups_info.size() * num_input_columns; - - // Initialize column chunk information - auto remaining_rows = num_rows; - for (auto const& rg : row_groups_info) { - auto const& row_group = _metadata->get_row_group(rg.index, rg.source_index); - auto const row_group_start = rg.start_row; - auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); - - // generate ColumnChunkDesc objects for everything to be decoded (all input columns) - for (size_t i = 0; i < num_input_columns; ++i) { - auto col = _input_columns[i]; - // look up metadata - auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); - auto& schema = _metadata->get_schema(col.schema_idx); - - auto [type_width, clock_rate, converted_type] = - conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()), - _timestamp_type.id(), - schema.type, - schema.converted_type, - schema.type_length); - - chunks.push_back(ColumnChunkDesc(col_meta.total_compressed_size, - nullptr, - col_meta.num_values, - schema.type, - type_width, - row_group_start, - row_group_rows, - schema.max_definition_level, - schema.max_repetition_level, - _metadata->get_output_nesting_depth(col.schema_idx), - required_bits(schema.max_definition_level), - required_bits(schema.max_repetition_level), - col_meta.codec, - converted_type, - schema.logical_type, - schema.decimal_precision, - clock_rate, - i, - col.schema_idx)); - } - - remaining_rows -= row_group_rows; - } -} - -void reader::impl::compute_input_pass_row_group_info() -{ - // at this point, row_groups has already been filtered down to just the row groups we need to - // handle optional skip_rows/num_rows parameters. - auto const& row_groups_info = _file_itm_data.row_groups; - - // if the user hasn't specified an input size limit, read everything in a single pass. - if (_input_pass_read_limit == 0) { - _input_pass_row_group_offsets.push_back(0); - _input_pass_row_group_offsets.push_back(row_groups_info.size()); - return; - } - - // generate passes. make sure to account for the case where a single row group doesn't fit within - // - std::size_t const read_limit = - _input_pass_read_limit > 0 ? _input_pass_read_limit : std::numeric_limits::max(); - std::size_t cur_pass_byte_size = 0; - std::size_t cur_rg_start = 0; - std::size_t cur_row_count = 0; - _input_pass_row_group_offsets.push_back(0); - _input_pass_row_count.push_back(0); - - for (size_t cur_rg_index = 0; cur_rg_index < row_groups_info.size(); cur_rg_index++) { - auto const& rgi = row_groups_info[cur_rg_index]; - auto const& row_group = _metadata->get_row_group(rgi.index, rgi.source_index); - - // can we add this row group - if (cur_pass_byte_size + row_group.total_byte_size >= read_limit) { - // A single row group (the current one) is larger than the read limit: - // We always need to include at least one row group, so end the pass at the end of the current - // row group - if (cur_rg_start == cur_rg_index) { - _input_pass_row_group_offsets.push_back(cur_rg_index + 1); - _input_pass_row_count.push_back(cur_row_count + row_group.num_rows); - cur_rg_start = cur_rg_index + 1; - cur_pass_byte_size = 0; - } - // End the pass at the end of the previous row group - else { - _input_pass_row_group_offsets.push_back(cur_rg_index); - _input_pass_row_count.push_back(cur_row_count); - cur_rg_start = cur_rg_index; - cur_pass_byte_size = row_group.total_byte_size; - } - } else { - cur_pass_byte_size += row_group.total_byte_size; - } - cur_row_count += row_group.num_rows; - } - // add the last pass if necessary - if (_input_pass_row_group_offsets.back() != row_groups_info.size()) { - _input_pass_row_group_offsets.push_back(row_groups_info.size()); - _input_pass_row_count.push_back(cur_row_count); - } -} - -void reader::impl::setup_pass() -{ - // this will also cause the previous pass information to be deleted - _pass_itm_data = std::make_unique(); - - // setup row groups to be loaded for this pass - auto const row_group_start = _input_pass_row_group_offsets[_current_input_pass]; - auto const row_group_end = _input_pass_row_group_offsets[_current_input_pass + 1]; - auto const num_row_groups = row_group_end - row_group_start; - _pass_itm_data->row_groups.resize(num_row_groups); - std::copy(_file_itm_data.row_groups.begin() + row_group_start, - _file_itm_data.row_groups.begin() + row_group_end, - _pass_itm_data->row_groups.begin()); - - auto const num_passes = _input_pass_row_group_offsets.size() - 1; - CUDF_EXPECTS(_current_input_pass < num_passes, "Encountered an invalid read pass index"); - - auto const chunks_per_rowgroup = _input_columns.size(); - auto const num_chunks = chunks_per_rowgroup * num_row_groups; - - auto chunk_start = _file_itm_data.chunks.begin() + (row_group_start * chunks_per_rowgroup); - auto chunk_end = _file_itm_data.chunks.begin() + (row_group_end * chunks_per_rowgroup); - - _pass_itm_data->chunks = cudf::detail::hostdevice_vector(num_chunks, _stream); - std::copy(chunk_start, chunk_end, _pass_itm_data->chunks.begin()); - - // adjust skip_rows and num_rows by what's available in the row groups we are processing - if (num_passes == 1) { - _pass_itm_data->skip_rows = _file_itm_data.global_skip_rows; - _pass_itm_data->num_rows = _file_itm_data.global_num_rows; - } else { - auto const global_start_row = _file_itm_data.global_skip_rows; - auto const global_end_row = global_start_row + _file_itm_data.global_num_rows; - auto const start_row = std::max(_input_pass_row_count[_current_input_pass], global_start_row); - auto const end_row = std::min(_input_pass_row_count[_current_input_pass + 1], global_end_row); - - // skip_rows is always global in the sense that it is relative to the first row of - // everything we will be reading, regardless of what pass we are on. - // num_rows is how many rows we are reading this pass. - _pass_itm_data->skip_rows = global_start_row + _input_pass_row_count[_current_input_pass]; - _pass_itm_data->num_rows = end_row - start_row; - } -} - void reader::impl::load_and_decompress_data() { // This function should never be called if `num_rows == 0`. @@ -1034,359 +835,8 @@ void print_pages(cudf::detail::hostdevice_vector& pages, rmm::cuda_str p.str_bytes); } } - -void print_cumulative_page_info(cudf::detail::hostdevice_vector& pages, - rmm::device_uvector const& page_index, - rmm::device_uvector const& c_info, - rmm::cuda_stream_view stream) -{ - pages.device_to_host_sync(stream); - - printf("------------\nCumulative sizes by page\n"); - - std::vector schemas(pages.size()); - std::vector h_page_index(pages.size()); - CUDF_CUDA_TRY(cudaMemcpy( - h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault)); - std::vector h_cinfo(pages.size()); - CUDF_CUDA_TRY(cudaMemcpy( - h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault)); - auto schema_iter = cudf::detail::make_counting_transform_iterator( - 0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; }); - thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin()); - auto last = thrust::unique(thrust::seq, schemas.begin(), schemas.end()); - schemas.resize(last - schemas.begin()); - printf("Num schemas: %lu\n", schemas.size()); - - for (size_t idx = 0; idx < schemas.size(); idx++) { - printf("Schema %d\n", schemas[idx]); - for (size_t pidx = 0; pidx < pages.size(); pidx++) { - auto const& page = pages[h_page_index[pidx]]; - if (page.flags & PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) { - continue; - } - printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes); - } - } -} - -void print_cumulative_row_info(host_span sizes, - std::string const& label, - std::optional> splits = std::nullopt) -{ - if (splits.has_value()) { - printf("------------\nSplits\n"); - for (size_t idx = 0; idx < splits->size(); idx++) { - printf("{%lu, %lu}\n", splits.value()[idx].skip_rows, splits.value()[idx].num_rows); - } - } - - printf("------------\nCumulative sizes %s\n", label.c_str()); - for (size_t idx = 0; idx < sizes.size(); idx++) { - printf("{%lu, %lu, %d}", sizes[idx].row_count, sizes[idx].size_bytes, sizes[idx].key); - if (splits.has_value()) { - // if we have a split at this row count and this is the last instance of this row count - auto start = thrust::make_transform_iterator( - splits->begin(), [](chunk_read_info const& i) { return i.skip_rows; }); - auto end = start + splits->size(); - auto split = std::find(start, end, sizes[idx].row_count); - auto const split_index = [&]() -> int { - if (split != end && - ((idx == sizes.size() - 1) || (sizes[idx + 1].row_count > sizes[idx].row_count))) { - return static_cast(std::distance(start, split)); - } - return idx == 0 ? 0 : -1; - }(); - if (split_index >= 0) { - printf(" <-- split {%lu, %lu}", - splits.value()[split_index].skip_rows, - splits.value()[split_index].num_rows); - } - } - printf("\n"); - } -} #endif // PREPROCESS_DEBUG -/** - * @brief Functor which reduces two cumulative_row_info structs of the same key. - */ -struct cumulative_row_sum { - cumulative_row_info operator() - __device__(cumulative_row_info const& a, cumulative_row_info const& b) const - { - return cumulative_row_info{a.row_count + b.row_count, a.size_bytes + b.size_bytes, a.key}; - } -}; - -/** - * @brief Functor which computes the total data size for a given type of cudf column. - * - * In the case of strings, the return size does not include the chars themselves. That - * information is tracked separately (see PageInfo::str_bytes). - */ -struct row_size_functor { - __device__ size_t validity_size(size_t num_rows, bool nullable) - { - return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) * 4) : 0; - } - - template - __device__ size_t operator()(size_t num_rows, bool nullable) - { - auto const element_size = sizeof(device_storage_type_t); - return (element_size * num_rows) + validity_size(num_rows, nullable); - } -}; - -template <> -__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) -{ - auto const offset_size = sizeof(size_type); - // NOTE: Adding the + 1 offset here isn't strictly correct. There will only be 1 extra offset - // for the entire column, whereas this is adding an extra offset per page. So we will get a - // small over-estimate of the real size of the order : # of pages * 4 bytes. It seems better - // to overestimate size somewhat than to underestimate it and potentially generate chunks - // that are too large. - return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable); -} - -template <> -__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) -{ - return validity_size(num_rows, nullable); -} - -template <> -__device__ size_t row_size_functor::operator()(size_t num_rows, bool nullable) -{ - // only returns the size of offsets and validity. the size of the actual string chars - // is tracked separately. - auto const offset_size = sizeof(size_type); - // see note about offsets in the list_view template. - return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable); -} - -/** - * @brief Functor which computes the total output cudf data size for all of - * the data in this page. - * - * Sums across all nesting levels. - */ -struct get_cumulative_row_info { - PageInfo const* const pages; - - __device__ cumulative_row_info operator()(size_type index) - { - auto const& page = pages[index]; - if (page.flags & PAGEINFO_FLAGS_DICTIONARY) { - return cumulative_row_info{0, 0, page.src_col_schema}; - } - - // total nested size, not counting string data - auto iter = - cudf::detail::make_counting_transform_iterator(0, [page, index] __device__(size_type i) { - auto const& pni = page.nesting[i]; - return cudf::type_dispatcher( - data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable); - }); - - size_t const row_count = static_cast(page.nesting[0].size); - return { - row_count, - thrust::reduce(thrust::seq, iter, iter + page.num_output_nesting_levels) + page.str_bytes, - page.src_col_schema}; - } -}; - -/** - * @brief Functor which computes the effective size of all input columns by page. - * - * For a given row, we want to find the cost of all pages for all columns involved - * in loading up to that row. The complication here is that not all pages are the - * same size between columns. Example: - * - * page row counts - * Column A: 0 <----> 100 <----> 200 - * Column B: 0 <---------------> 200 <--------> 400 - | - * if we decide to split at row 100, we don't really know the actual amount of bytes in column B - * at that point. So we have to proceed as if we are taking the bytes from all 200 rows of that - * page. Essentially, a conservative over-estimate of the real size. - */ -struct row_total_size { - cumulative_row_info const* c_info; - size_type const* key_offsets; - size_t num_keys; - - __device__ cumulative_row_info operator()(cumulative_row_info const& i) - { - // sum sizes for each input column at this row - size_t sum = 0; - for (int idx = 0; idx < num_keys; idx++) { - auto const start = key_offsets[idx]; - auto const end = key_offsets[idx + 1]; - auto iter = cudf::detail::make_counting_transform_iterator( - 0, [&] __device__(size_type i) { return c_info[i].row_count; }); - auto const page_index = - thrust::lower_bound(thrust::seq, iter + start, iter + end, i.row_count) - iter; - sum += c_info[page_index].size_bytes; - } - return {i.row_count, sum, i.key}; - } -}; - -/** - * @brief Given a vector of cumulative {row_count, byte_size} pairs and a chunk read - * limit, determine the set of splits. - * - * @param sizes Vector of cumulative {row_count, byte_size} pairs - * @param num_rows Total number of rows to read - * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns - */ -std::vector find_splits(std::vector const& sizes, - size_t num_rows, - size_t chunk_read_limit) -{ - // now we have an array of {row_count, real output bytes}. just walk through it and generate - // splits. - // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch - // sizes are reasonably large, this shouldn't iterate too many times - std::vector splits; - { - size_t cur_pos = 0; - size_t cur_cumulative_size = 0; - size_t cur_row_count = 0; - auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) { - return i.size_bytes - cur_cumulative_size; - }); - auto end = start + sizes.size(); - while (cur_row_count < num_rows) { - int64_t split_pos = - thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start; - - // if we're past the end, or if the returned bucket is > than the chunk_read_limit, move back - // one. - if (static_cast(split_pos) >= sizes.size() || - (sizes[split_pos].size_bytes - cur_cumulative_size > chunk_read_limit)) { - split_pos--; - } - - // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in - // a loop because all of the cumulative sizes for all the pages are sorted into one big list. - // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in - // the list twice. so we have to iterate until we skip past all of them. The idea is that we - // either do this, or we have to call unique() on the input first. - while (split_pos < (static_cast(sizes.size()) - 1) && - (split_pos < 0 || sizes[split_pos].row_count == cur_row_count)) { - split_pos++; - } - - auto const start_row = cur_row_count; - cur_row_count = sizes[split_pos].row_count; - splits.push_back(chunk_read_info{start_row, cur_row_count - start_row}); - cur_pos = split_pos; - cur_cumulative_size = sizes[split_pos].size_bytes; - } - } - // print_cumulative_row_info(sizes, "adjusted", splits); - - return splits; -} - -/** - * @brief Given a set of pages that have had their sizes computed by nesting level and - * a limit on total read size, generate a set of {skip_rows, num_rows} pairs representing - * a set of reads that will generate output columns of total size <= `chunk_read_limit` bytes. - * - * @param pages All pages in the file - * @param id Additional intermediate information required to process the pages - * @param num_rows Total number of rows to read - * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns - * @param stream CUDA stream to use - */ -std::vector compute_splits(cudf::detail::hostdevice_vector& pages, - pass_intermediate_data const& id, - size_t num_rows, - size_t chunk_read_limit, - rmm::cuda_stream_view stream) -{ - auto const& page_keys = id.page_keys; - auto const& page_index = id.page_index; - - // generate cumulative row counts and sizes - rmm::device_uvector c_info(page_keys.size(), stream); - // convert PageInfo to cumulative_row_info - auto page_input = thrust::make_transform_iterator(page_index.begin(), - get_cumulative_row_info{pages.device_ptr()}); - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - page_keys.begin(), - page_keys.end(), - page_input, - c_info.begin(), - thrust::equal_to{}, - cumulative_row_sum{}); - // print_cumulative_page_info(pages, page_index, c_info, stream); - - // sort by row count - rmm::device_uvector c_info_sorted{c_info, stream}; - thrust::sort(rmm::exec_policy(stream), - c_info_sorted.begin(), - c_info_sorted.end(), - [] __device__(cumulative_row_info const& a, cumulative_row_info const& b) { - return a.row_count < b.row_count; - }); - - // std::vector h_c_info_sorted(c_info_sorted.size()); - // CUDF_CUDA_TRY(cudaMemcpy(h_c_info_sorted.data(), - // c_info_sorted.data(), - // sizeof(cumulative_row_info) * c_info_sorted.size(), - // cudaMemcpyDefault)); - // print_cumulative_row_info(h_c_info_sorted, "raw"); - - // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per - // key - rmm::device_uvector key_offsets(page_keys.size() + 1, stream); - auto const key_offsets_end = thrust::reduce_by_key(rmm::exec_policy(stream), - page_keys.begin(), - page_keys.end(), - thrust::make_constant_iterator(1), - thrust::make_discard_iterator(), - key_offsets.begin()) - .second; - size_t const num_unique_keys = key_offsets_end - key_offsets.begin(); - thrust::exclusive_scan( - rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin()); - - // adjust the cumulative info such that for each row count, the size includes any pages that span - // that row count. this is so that if we have this case: - // page row counts - // Column A: 0 <----> 100 <----> 200 - // Column B: 0 <---------------> 200 <--------> 400 - // | - // if we decide to split at row 100, we don't really know the actual amount of bytes in column B - // at that point. So we have to proceed as if we are taking the bytes from all 200 rows of that - // page. - // - rmm::device_uvector aggregated_info(c_info.size(), stream); - thrust::transform(rmm::exec_policy(stream), - c_info_sorted.begin(), - c_info_sorted.end(), - aggregated_info.begin(), - row_total_size{c_info.data(), key_offsets.data(), num_unique_keys}); - - // bring back to the cpu - std::vector h_aggregated_info(aggregated_info.size()); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_aggregated_info.data(), - aggregated_info.data(), - sizeof(cumulative_row_info) * c_info.size(), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); - - return find_splits(h_aggregated_info, num_rows, chunk_read_limit); -} - struct get_page_chunk_idx { __device__ size_type operator()(PageInfo const& page) { return page.chunk_idx; } }; @@ -1822,12 +1272,8 @@ void reader::impl::preprocess_pages(bool uses_custom_row_bounds, size_t chunk_re _pass_itm_data->page_keys = std::move(page_keys); _pass_itm_data->page_index = std::move(page_index); - // compute splits if necessary. otherwise return a single split representing - // the whole file. - _pass_itm_data->output_chunk_read_info = - _output_chunk_read_limit > 0 - ? compute_splits(pages, *_pass_itm_data, num_rows, chunk_read_limit, _stream) - : std::vector{{skip_rows, num_rows}}; + // compute splits for the pass + compute_splits_for_pass(); } void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds) diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index f3a43cbc63c..dd049d401cf 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -51,19 +51,21 @@ std::unique_ptr gather_column_buffer::make_string_column_impl(rmm::cuda_ return make_strings_column(*_strings, stream, _mr); } -void inline_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream) +void cudf::io::detail::inline_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream) { CUDF_EXPECTS(type.id() == type_id::STRING, "allocate_strings_data called for non-string column"); // size + 1 for final offset. _string_data will be initialized later. _data = create_data(data_type{type_id::INT32}, size + 1, stream, _mr); } -void inline_column_buffer::create_string_data(size_t num_bytes, rmm::cuda_stream_view stream) +void cudf::io::detail::inline_column_buffer::create_string_data(size_t num_bytes, + rmm::cuda_stream_view stream) { _string_data = rmm::device_buffer(num_bytes, stream, _mr); } -std::unique_ptr inline_column_buffer::make_string_column_impl(rmm::cuda_stream_view stream) +std::unique_ptr cudf::io::detail::inline_column_buffer::make_string_column_impl( + rmm::cuda_stream_view stream) { // no need for copies, just transfer ownership of the data_buffers to the columns auto const state = mask_state::UNALLOCATED; @@ -324,7 +326,7 @@ std::unique_ptr empty_like(column_buffer_base& buffer, } using pointer_type = gather_column_buffer; -using string_type = inline_column_buffer; +using string_type = cudf::io::detail::inline_column_buffer; using pointer_column_buffer = column_buffer_base; using string_column_buffer = column_buffer_base; From 053da82810ad78286602cfd09e37f8a22cb0a15b Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 10 Oct 2023 13:28:01 -0400 Subject: [PATCH 48/76] Make parquet schema index type consistent (#14256) While working on parquet schema issue I noticed that the parent and child index didn't match. Discussion ensued and `size_type` was decided. Authors: - Mike Wilson (https://github.com/hyperbolic2346) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - MithunR (https://github.com/mythrocks) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14256 --- cpp/src/io/parquet/parquet.hpp | 6 ++++-- cpp/src/io/parquet/reader_impl_helpers.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index c5993d73dec..dbec59670c7 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -18,6 +18,8 @@ #include "parquet_common.hpp" +#include + #include #include @@ -152,8 +154,8 @@ struct SchemaElement { // The following fields are filled in later during schema initialization int max_definition_level = 0; int max_repetition_level = 0; - int parent_idx = 0; - std::vector children_idx; + size_type parent_idx = 0; + std::vector children_idx; bool operator==(SchemaElement const& other) const { diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 171cf07da3e..040c6403f57 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -213,7 +213,7 @@ void metadata::sanitize_schema() // add a struct child and move this element's children to the struct schema_elem.converted_type = LIST; schema_elem.repetition_type = OPTIONAL; - auto const struct_node_idx = schema.size(); + auto const struct_node_idx = static_cast(schema.size()); SchemaElement struct_elem; struct_elem.name = "struct_node"; From 5039d043a08e7ea7e5656bab60a6fced4dfa2f1d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 10 Oct 2023 15:06:24 -0400 Subject: [PATCH 49/76] Expose stream parameter in public strings APIs (#14260) Add stream parameter to public APIs: - `cudf::strings::strip()` - `cudf::strings::slice_strings()` - `cudf::strings::pad()` - `cudf::strings::zfill()` - `cudf::strings::wrap()` Also cleaned up some of the doxygen comments and added stream-tests. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14260 --- cpp/include/cudf/strings/padding.hpp | 14 ++-- cpp/include/cudf/strings/slice.hpp | 30 +++++---- cpp/include/cudf/strings/strip.hpp | 4 +- cpp/include/cudf/strings/wrap.hpp | 14 ++-- cpp/src/strings/padding.cu | 6 +- cpp/src/strings/slice.cu | 7 +- cpp/src/strings/strip.cu | 5 +- cpp/src/strings/wrap.cu | 8 +-- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/streams/strings/strings_tests.cpp | 71 +++++++++++++++++++++ 10 files changed, 125 insertions(+), 38 deletions(-) create mode 100644 cpp/tests/streams/strings/strings_tests.cpp diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp index 7699159fbea..f0cb351eeda 100644 --- a/cpp/include/cudf/strings/padding.hpp +++ b/cpp/include/cudf/strings/padding.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,6 +51,7 @@ namespace strings { * Default is pad right (left justify) * @param fill_char Single UTF-8 character to use for padding; * Default is the space character + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New column with padded strings */ @@ -59,6 +60,7 @@ std::unique_ptr pad( size_type width, side_type side = side_type::RIGHT, std::string_view fill_char = " ", + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -79,14 +81,16 @@ std::unique_ptr pad( * r is now ['001234','-09876','+00.34','-342567', '0002+2'] * @endcode * - * @param input Strings instance for this operation. - * @param width The minimum number of characters for each string. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of strings. + * @param input Strings instance for this operation + * @param width The minimum number of characters for each string + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of strings */ std::unique_ptr zfill( strings_column_view const& input, size_type width, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/slice.hpp b/cpp/include/cudf/strings/slice.hpp index 5f2c71725eb..f106663be9b 100644 --- a/cpp/include/cudf/strings/slice.hpp +++ b/cpp/include/cudf/strings/slice.hpp @@ -50,18 +50,20 @@ namespace strings { * r2 is now ["lo","ob"] * @endcode * - * @param strings Strings column for this operation. - * @param start First character position to begin the substring. - * @param stop Last character position (exclusive) to end the substring. - * @param step Distance between input characters retrieved. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with sorted elements of this instance. + * @param input Strings column for this operation + * @param start First character position to begin the substring + * @param stop Last character position (exclusive) to end the substring + * @param step Distance between input characters retrieved + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with sorted elements of this instance */ std::unique_ptr slice_strings( - strings_column_view const& strings, + strings_column_view const& input, numeric_scalar const& start = numeric_scalar(0, false), numeric_scalar const& stop = numeric_scalar(0, false), numeric_scalar const& step = numeric_scalar(1), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -95,16 +97,18 @@ std::unique_ptr slice_strings( * @throw cudf::logic_error if starts and stops are not same integer type. * @throw cudf::logic_error if starts or stops contains nulls. * - * @param strings Strings column for this operation. - * @param starts First character positions to begin the substring. - * @param stops Last character (exclusive) positions to end the substring. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with sorted elements of this instance. + * @param input Strings column for this operation + * @param starts First character positions to begin the substring + * @param stops Last character (exclusive) positions to end the substring + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with sorted elements of this instance */ std::unique_ptr slice_strings( - strings_column_view const& strings, + strings_column_view const& input, column_view const& starts, column_view const& stops, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp index adf3b291144..556d6805ac3 100644 --- a/cpp/include/cudf/strings/strip.hpp +++ b/cpp/include/cudf/strings/strip.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,6 +57,7 @@ namespace strings { * string; Default is both * @param to_strip UTF-8 encoded characters to strip from each string; * Default is empty string which indicates strip whitespace characters + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column. */ @@ -64,6 +65,7 @@ std::unique_ptr strip( strings_column_view const& input, side_type side = side_type::BOTH, string_scalar const& to_strip = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/wrap.hpp b/cpp/include/cudf/strings/wrap.hpp index 8d2d43c7f0f..efdc3e62aff 100644 --- a/cpp/include/cudf/strings/wrap.hpp +++ b/cpp/include/cudf/strings/wrap.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,14 +57,16 @@ namespace strings { * wrapped_string_tbl = ["the quick\nbrown fox\njumped over\nthe lazy\nbrown dog", "hello, world"] * ``` * - * @param[in] strings String column. - * @param[in] width Maximum character width of a line within each string. - * @param[in] mr Device memory resource used to allocate the returned column's device memory - * @return Column of wrapped strings. + * @param input String column + * @param width Maximum character width of a line within each string + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Column of wrapped strings */ std::unique_ptr wrap( - strings_column_view const& strings, + strings_column_view const& input, size_type width, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index c501a8bf7b4..850ccaa4535 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -168,18 +168,20 @@ std::unique_ptr pad(strings_column_view const& input, size_type width, side_type side, std::string_view fill_char, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::pad(input, width, side, fill_char, cudf::get_default_stream(), mr); + return detail::pad(input, width, side, fill_char, stream, mr); } std::unique_ptr zfill(strings_column_view const& input, size_type width, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::zfill(input, width, cudf::get_default_stream(), mr); + return detail::zfill(input, width, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index cce6a19a5a6..5a1fee92c7d 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -248,20 +248,21 @@ std::unique_ptr slice_strings(strings_column_view const& strings, numeric_scalar const& start, numeric_scalar const& stop, numeric_scalar const& step, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr); + return detail::slice_strings(strings, start, stop, step, stream, mr); } std::unique_ptr slice_strings(strings_column_view const& strings, column_view const& starts_column, column_view const& stops_column, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings( - strings, starts_column, stops_column, cudf::get_default_stream(), mr); + return detail::slice_strings(strings, starts_column, stops_column, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index 6fb7c671a87..26df76850f7 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -86,10 +86,11 @@ std::unique_ptr strip(strings_column_view const& input, std::unique_ptr strip(strings_column_view const& input, side_type side, string_scalar const& to_strip, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::strip(input, side, to_strip, cudf::get_default_stream(), mr); + return detail::strip(input, side, to_strip, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu index 335908d65d1..aa87a663964 100644 --- a/cpp/src/strings/wrap.cu +++ b/cpp/src/strings/wrap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,10 +19,9 @@ #include #include #include -#include -#include #include #include +#include #include #include @@ -133,10 +132,11 @@ std::unique_ptr wrap(strings_column_view const& strings, std::unique_ptr wrap(strings_column_view const& strings, size_type width, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::wrap(strings, width, cudf::get_default_stream(), mr); + return detail::wrap(strings, width, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ffaba7d6fa7..b15a6c41d39 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -633,8 +633,8 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( - STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE - testing + STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp + streams/strings/strings_tests.cpp STREAM_MODE testing ) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/strings/strings_tests.cpp b/cpp/tests/streams/strings/strings_tests.cpp new file mode 100644 index 00000000000..0db467a6895 --- /dev/null +++ b/cpp/tests/streams/strings/strings_tests.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +class StringsTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsTest, Strip) +{ + auto input = cudf::test::strings_column_wrapper({" aBc ", " ", "aaaa ", "\tb"}); + auto view = cudf::strings_column_view(input); + + auto const strip = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const side = cudf::strings::side_type::BOTH; + cudf::strings::strip(view, side, strip, cudf::test::get_default_stream()); +} + +TEST_F(StringsTest, Pad) +{ + auto input = cudf::test::strings_column_wrapper({"333", "", "4444", "1"}); + auto view = cudf::strings_column_view(input); + + auto const side = cudf::strings::side_type::BOTH; + cudf::strings::pad(view, 6, side, " ", cudf::test::get_default_stream()); + cudf::strings::zfill(view, 6, cudf::test::get_default_stream()); +} + +TEST_F(StringsTest, Wrap) +{ + auto input = cudf::test::strings_column_wrapper({"the quick brown fox jumped"}); + auto view = cudf::strings_column_view(input); + + cudf::strings::wrap(view, 6, cudf::test::get_default_stream()); +} + +TEST_F(StringsTest, Slice) +{ + auto input = cudf::test::strings_column_wrapper({"hello", "these", "are test strings"}); + auto view = cudf::strings_column_view(input); + + auto start = cudf::numeric_scalar(2, true, cudf::test::get_default_stream()); + auto stop = cudf::numeric_scalar(5, true, cudf::test::get_default_stream()); + auto step = cudf::numeric_scalar(1, true, cudf::test::get_default_stream()); + cudf::strings::slice_strings(view, start, stop, step, cudf::test::get_default_stream()); + + auto starts = cudf::test::fixed_width_column_wrapper({1, 2, 3}); + auto stops = cudf::test::fixed_width_column_wrapper({4, 5, 6}); + cudf::strings::slice_strings(view, starts, stops, cudf::test::get_default_stream()); +} From c0c7ed8405c679752439081ee1b42b22658264c9 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 11 Oct 2023 00:04:58 +0200 Subject: [PATCH 50/76] Add `bytes_per_second` to transpose benchmark (#14170) This patch relates to #13735. Benchmark: [transpose_benchmark.txt](https://github.com/rapidsai/cudf/files/12699834/transpose_benchmark.txt) Authors: - Martin Marenz (https://github.com/Blonck) - Mark Harris (https://github.com/harrism) Approvers: - Mark Harris (https://github.com/harrism) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/14170 --- cpp/benchmarks/transpose/transpose.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 2f41bda4b88..c2737325462 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -20,17 +20,19 @@ #include #include #include +#include #include #include static void BM_transpose(benchmark::State& state) { - auto count = state.range(0); + auto count = state.range(0); + constexpr auto column_type_id = cudf::type_id::INT32; auto int_column_generator = thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { return cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT32}, count, cudf::mask_state::ALL_VALID); + cudf::data_type{column_type_id}, count, cudf::mask_state::ALL_VALID); }); auto input_table = cudf::table(std::vector(int_column_generator, int_column_generator + count)); @@ -40,6 +42,17 @@ static void BM_transpose(benchmark::State& state) cuda_event_timer raii(state, true); auto output = cudf::transpose(input); } + + // Collect memory statistics. + auto const bytes_read = static_cast(input.num_columns()) * input.num_rows() * + sizeof(cudf::id_to_type); + auto const bytes_written = bytes_read; + // Account for nullability in input and output. + auto const null_bytes = 2 * static_cast(input.num_columns()) * + cudf::bitmask_allocation_size_bytes(input.num_rows()); + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_read + bytes_written + null_bytes)); } class Transpose : public cudf::benchmark {}; From 0ed7725416879a824ee5b96292eda2d1048a9ada Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 11 Oct 2023 00:06:23 +0200 Subject: [PATCH 51/76] Add `bytes_per_second` to shift benchmark (#13950) Adds `bytes_per_second` to `SHIFT_BENCH`. This patch relates to #13735. Authors: - Martin Marenz (https://github.com/Blonck) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/13950 --- cpp/benchmarks/copying/shift.cu | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 460100a8fe9..e1169e3bcd6 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -56,18 +56,32 @@ static void BM_shift(benchmark::State& state) cudf::size_type size = state.range(0); cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); - auto const input_table = - create_sequence_table({cudf::type_to_id()}, - row_count{size}, - use_validity ? std::optional{1.0} : std::nullopt); + auto constexpr column_type_id = cudf::type_id::INT32; + using column_type = cudf::id_to_type; + + auto const input_table = create_sequence_table( + {column_type_id}, row_count{size}, use_validity ? std::optional{1.0} : std::nullopt); cudf::column_view input{input_table->get_column(0)}; - auto fill = use_validity ? make_scalar() : make_scalar(777); + auto fill = use_validity ? make_scalar() : make_scalar(777); for (auto _ : state) { cuda_event_timer raii(state, true); auto output = cudf::shift(input, offset, *fill); } + + auto const elems_read = (size - offset); + auto const bytes_read = elems_read * sizeof(column_type); + + // If 'use_validity' is false, the fill value is a number, and the entire column + // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' + // is true, only the elements that can be shifted are written, along with the full null bitmask. + auto const elems_written = use_validity ? (size - offset) : size; + auto const bytes_written = elems_written * sizeof(column_type); + auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_written + bytes_read + null_bytes)); } class Shift : public cudf::benchmark {}; From aa8b0f8e4e71a8e2b076656e0a8bf00bfc15ecb8 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Tue, 10 Oct 2023 16:14:51 -0700 Subject: [PATCH 52/76] Handle empty string correctly in Parquet statistics (#14257) An empty string should be a valid minimum value for a string column, but the current parquet writer considers an empty string to have no value when writing the column chunk statistics. This PR changes all fields in the Statistics struct to be `thrust::optional` to help distinguish between a valid empty string and no value. Authors: - Ed Seidl (https://github.com/etseidl) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14257 --- .../io/parquet/compact_protocol_reader.cpp | 15 ++-- .../io/parquet/compact_protocol_writer.cpp | 12 +-- cpp/src/io/parquet/parquet.hpp | 18 ++-- cpp/src/io/parquet/predicate_pushdown.cpp | 14 +-- cpp/tests/io/parquet_test.cpp | 85 +++++++++++++++---- 5 files changed, 104 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 81d1be64a45..1a345ee0750 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -767,12 +767,15 @@ bool CompactProtocolReader::read(ColumnIndex* c) bool CompactProtocolReader::read(Statistics* s) { - auto op = std::make_tuple(parquet_field_binary(1, s->max), - parquet_field_binary(2, s->min), - parquet_field_int64(3, s->null_count), - parquet_field_int64(4, s->distinct_count), - parquet_field_binary(5, s->max_value), - parquet_field_binary(6, s->min_value)); + using optional_binary = parquet_field_optional, parquet_field_binary>; + using optional_int64 = parquet_field_optional; + + auto op = std::make_tuple(optional_binary(1, s->max), + optional_binary(2, s->min), + optional_int64(3, s->null_count), + optional_int64(4, s->distinct_count), + optional_binary(5, s->max_value), + optional_binary(6, s->min_value)); return function_builder(this, op); } diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index 9adc8767880..00810269d3c 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -195,12 +195,12 @@ size_t CompactProtocolWriter::write(ColumnChunkMetaData const& s) size_t CompactProtocolWriter::write(Statistics const& s) { CompactProtocolFieldWriter c(*this); - if (not s.max.empty()) { c.field_binary(1, s.max); } - if (not s.min.empty()) { c.field_binary(2, s.min); } - if (s.null_count != -1) { c.field_int(3, s.null_count); } - if (s.distinct_count != -1) { c.field_int(4, s.distinct_count); } - if (not s.max_value.empty()) { c.field_binary(5, s.max_value); } - if (not s.min_value.empty()) { c.field_binary(6, s.min_value); } + if (s.max.has_value()) { c.field_binary(1, s.max.value()); } + if (s.min.has_value()) { c.field_binary(2, s.min.value()); } + if (s.null_count.has_value()) { c.field_int(3, s.null_count.value()); } + if (s.distinct_count.has_value()) { c.field_int(4, s.distinct_count.value()); } + if (s.max_value.has_value()) { c.field_binary(5, s.max_value.value()); } + if (s.min_value.has_value()) { c.field_binary(6, s.min_value.value()); } return c.value(); } diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index dbec59670c7..1cd16ac6102 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -215,12 +215,18 @@ struct SchemaElement { * @brief Thrift-derived struct describing column chunk statistics */ struct Statistics { - std::vector max; // deprecated max value in signed comparison order - std::vector min; // deprecated min value in signed comparison order - int64_t null_count = -1; // count of null values in the column - int64_t distinct_count = -1; // count of distinct values occurring - std::vector max_value; // max value for column determined by ColumnOrder - std::vector min_value; // min value for column determined by ColumnOrder + // deprecated max value in signed comparison order + thrust::optional> max; + // deprecated min value in signed comparison order + thrust::optional> min; + // count of null values in the column + thrust::optional null_count; + // count of distinct values occurring + thrust::optional distinct_count; + // max value for column determined by ColumnOrder + thrust::optional> max_value; + // min value for column determined by ColumnOrder + thrust::optional> min_value; }; /** diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index 9083be1c2dd..a5851de3c20 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -150,12 +150,14 @@ struct stats_caster { { } - void set_index(size_type index, std::vector const& binary_value, Type const type) + void set_index(size_type index, + thrust::optional> const& binary_value, + Type const type) { - if (!binary_value.empty()) { - val[index] = convert(binary_value.data(), binary_value.size(), type); + if (binary_value.has_value()) { + val[index] = convert(binary_value.value().data(), binary_value.value().size(), type); } - if (binary_value.empty()) { + if (not binary_value.has_value()) { clear_bit_unsafe(null_mask.data(), index); null_count++; } @@ -210,10 +212,10 @@ struct stats_caster { auto const& row_group = per_file_metadata[src_idx].row_groups[rg_idx]; auto const& colchunk = row_group.columns[col_idx]; // To support deprecated min, max fields. - auto const& min_value = colchunk.meta_data.statistics.min_value.size() > 0 + auto const& min_value = colchunk.meta_data.statistics.min_value.has_value() ? colchunk.meta_data.statistics.min_value : colchunk.meta_data.statistics.min; - auto const& max_value = colchunk.meta_data.statistics.max_value.size() > 0 + auto const& max_value = colchunk.meta_data.statistics.max_value.has_value() ? colchunk.meta_data.statistics.max_value : colchunk.meta_data.statistics.max; // translate binary data to Type then to diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 3e5d7033e60..fa85e3a4a1d 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -4161,8 +4161,10 @@ TEST_P(ParquetV2Test, LargeColumnIndex) // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; - EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0); - EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0); } } } @@ -4242,6 +4244,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex) auto const ci = read_column_index(source, chunk); auto const stats = get_statistics(chunk); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + // schema indexing starts at 1 auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; @@ -4250,10 +4255,10 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex) EXPECT_FALSE(ci.null_pages[p]); // null_counts should always be 0 EXPECT_EQ(ci.null_counts[p], 0); - EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0); } for (size_t p = 0; p < ci.max_values.size(); p++) - EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0); + EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0); } } } @@ -4344,7 +4349,10 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls) auto const stats = get_statistics(chunk); // should be half nulls, except no nulls in column 0 - EXPECT_EQ(stats.null_count, c == 0 ? 0 : num_rows / 2); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + ASSERT_TRUE(stats.null_count.has_value()); + EXPECT_EQ(stats.null_count.value(), c == 0 ? 0 : num_rows / 2); // schema indexing starts at 1 auto const ptype = fmd.schema[c + 1].type; @@ -4356,10 +4364,10 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls) } else { EXPECT_EQ(ci.null_counts[p], 0); } - EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0); } for (size_t p = 0; p < ci.max_values.size(); p++) { - EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0); + EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0); } } } @@ -4436,7 +4444,12 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn) auto const stats = get_statistics(chunk); // there should be no nulls except column 1 which is all nulls - EXPECT_EQ(stats.null_count, c == 1 ? num_rows : 0); + if (c != 1) { + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + } + ASSERT_TRUE(stats.null_count.has_value()); + EXPECT_EQ(stats.null_count.value(), c == 1 ? num_rows : 0); // schema indexing starts at 1 auto const ptype = fmd.schema[c + 1].type; @@ -4449,12 +4462,12 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn) } if (not ci.null_pages[p]) { EXPECT_EQ(ci.null_counts[p], 0); - EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0); } } for (size_t p = 0; p < ci.max_values.size(); p++) { if (not ci.null_pages[p]) { - EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0); + EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0); } } } @@ -4533,13 +4546,16 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct) auto const ci = read_column_index(source, chunk); auto const stats = get_statistics(chunk); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + auto const ptype = fmd.schema[colidx].type; auto const ctype = fmd.schema[colidx].converted_type; for (size_t p = 0; p < ci.min_values.size(); p++) { - EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(stats.min_value.value(), ci.min_values[p], ptype, ctype) <= 0); } for (size_t p = 0; p < ci.max_values.size(); p++) { - EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0); + EXPECT_TRUE(compare_binary(stats.max_value.value(), ci.max_values[p], ptype, ctype) >= 0); } } } @@ -4829,11 +4845,14 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation) auto const ci = read_column_index(source, chunk); auto const stats = get_statistics(chunk); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; - EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0); - EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0); + EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0); // check that truncated values == expected EXPECT_EQ(memcmp(ci.min_values[0].data(), truncated_min[c], ci.min_values[0].size()), 0); @@ -4890,8 +4909,10 @@ TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation) // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max auto const ptype = fmd.schema[c + 1].type; auto const ctype = fmd.schema[c + 1].converted_type; - EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0); - EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0); + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value.value(), ptype, ctype) <= 0); + EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value.value(), ptype, ctype) >= 0); // check that truncated values == expected EXPECT_EQ(ci.min_values[0], truncated_min[c]); @@ -6737,6 +6758,38 @@ TEST_P(ParquetV2Test, CheckEncodings) } } +TEST_F(ParquetWriterTest, EmptyMinStringStatistics) +{ + char const* const min_val = ""; + char const* const max_val = "zzz"; + std::vector strings{min_val, max_val, "pining", "for", "the", "fjords"}; + + column_wrapper string_col{strings.begin(), strings.end()}; + auto const output = table_view{{string_col}}; + auto const filepath = temp_env->get_temp_filepath("EmptyMinStringStatistics.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output); + cudf::io::write_parquet(out_opts); + + auto const source = cudf::io::datasource::create(filepath); + cudf::io::parquet::detail::FileMetaData fmd; + read_footer(source, &fmd); + + ASSERT_TRUE(fmd.row_groups.size() > 0); + ASSERT_TRUE(fmd.row_groups[0].columns.size() > 0); + auto const& chunk = fmd.row_groups[0].columns[0]; + auto const stats = get_statistics(chunk); + + ASSERT_TRUE(stats.min_value.has_value()); + ASSERT_TRUE(stats.max_value.has_value()); + auto const min_value = std::string{reinterpret_cast(stats.min_value.value().data()), + stats.min_value.value().size()}; + auto const max_value = std::string{reinterpret_cast(stats.max_value.value().data()), + stats.max_value.value().size()}; + EXPECT_EQ(min_value, std::string(min_val)); + EXPECT_EQ(max_value, std::string(max_val)); +} + TEST_F(ParquetReaderTest, RepeatedNoAnnotations) { constexpr unsigned char repeated_bytes[] = { From b17904dbaa4de1a162fcb4a0f64862f9f83b976f Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 10 Oct 2023 19:51:02 -0500 Subject: [PATCH 53/76] Add in java bindings for DataSource (#14254) This PR adds DataSource Java bindings. It also fixes a small bug in CUDF that made it so the bindings would not work for anything but CSV. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Jason Lowe (https://github.com/jlowe) - Vukasin Milovanovic (https://github.com/vuule) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/14254 --- cpp/src/io/utilities/datasource.cpp | 8 + java/src/main/java/ai/rapids/cudf/Cuda.java | 24 +- .../main/java/ai/rapids/cudf/DataSource.java | 189 ++++++++++++++ .../java/ai/rapids/cudf/DataSourceHelper.java | 44 ++++ .../ai/rapids/cudf/DeviceMemoryBuffer.java | 6 +- .../ai/rapids/cudf/MultiBufferDataSource.java | 230 +++++++++++++++++ .../ai/rapids/cudf/ParquetChunkedReader.java | 45 +++- java/src/main/java/ai/rapids/cudf/Table.java | 99 +++++++- java/src/main/native/CMakeLists.txt | 1 + java/src/main/native/src/ChunkedReaderJni.cpp | 36 ++- java/src/main/native/src/CudfJni.cpp | 8 + .../main/native/src/DataSourceHelperJni.cpp | 237 ++++++++++++++++++ java/src/main/native/src/TableJni.cpp | 212 +++++++++++++++- java/src/main/native/src/cudf_jni_apis.hpp | 8 + .../test/java/ai/rapids/cudf/TableTest.java | 225 +++++++++++++++++ 15 files changed, 1358 insertions(+), 14 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/DataSource.java create mode 100644 java/src/main/java/ai/rapids/cudf/DataSourceHelper.java create mode 100644 java/src/main/java/ai/rapids/cudf/MultiBufferDataSource.java create mode 100644 java/src/main/native/src/DataSourceHelperJni.cpp diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 7a7121aa91d..5cdd92ce3b7 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -375,6 +375,14 @@ class user_datasource_wrapper : public datasource { return source->device_read(offset, size, stream); } + std::future device_read_async(size_t offset, + size_t size, + uint8_t* dst, + rmm::cuda_stream_view stream) override + { + return source->device_read_async(offset, size, dst, stream); + } + [[nodiscard]] size_t size() const override { return source->size(); } private: diff --git a/java/src/main/java/ai/rapids/cudf/Cuda.java b/java/src/main/java/ai/rapids/cudf/Cuda.java index e1298e29925..7cc3d30a9cf 100755 --- a/java/src/main/java/ai/rapids/cudf/Cuda.java +++ b/java/src/main/java/ai/rapids/cudf/Cuda.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,6 @@ */ package ai.rapids.cudf; -import ai.rapids.cudf.NvtxColor; -import ai.rapids.cudf.NvtxRange; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -90,6 +87,21 @@ private Stream() { this.id = -1; } + private Stream(long id) { + this.cleaner = null; + this.id = id; + } + + /** + * Wrap a given stream ID to make it accessible. + */ + static Stream wrap(long id) { + if (id == -1) { + return DEFAULT_STREAM; + } + return new Stream(id); + } + /** * Have this stream not execute new work until the work recorded in event completes. * @param event the event to wait on. @@ -122,7 +134,9 @@ public synchronized void close() { cleaner.delRef(); } if (closed) { - cleaner.logRefCountDebug("double free " + this); + if (cleaner != null) { + cleaner.logRefCountDebug("double free " + this); + } throw new IllegalStateException("Close called too many times " + this); } if (cleaner != null) { diff --git a/java/src/main/java/ai/rapids/cudf/DataSource.java b/java/src/main/java/ai/rapids/cudf/DataSource.java new file mode 100644 index 00000000000..1e5893235df --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/DataSource.java @@ -0,0 +1,189 @@ +/* + * + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; + +/** + * Base class that can be used to provide data dynamically to CUDF. This follows somewhat + * closely with cudf::io::datasource. There are a few main differences. + *
+ * First this does not expose async device reads. It will call the non-async device read API + * instead. This might be added in the future, but there was no direct use case for it in java + * right now to warrant the added complexity. + *
+ * Second there is no implementation of the device read API that returns a buffer instead of + * writing into one. This is not used by CUDF yet so testing an implementation that isn't used + * didn't feel ideal. If it is needed we will add one in the future. + */ +public abstract class DataSource implements AutoCloseable { + private static final Logger log = LoggerFactory.getLogger(DataSource.class); + + /** + * This is used to keep track of the HostMemoryBuffers in java land so the C++ layer + * does not have to do it. + */ + private final HashMap cachedBuffers = new HashMap<>(); + + @Override + public void close() { + if (!cachedBuffers.isEmpty()) { + throw new IllegalStateException("DataSource closed before all returned host buffers were closed"); + } + } + + /** + * Get the size of the source in bytes. + */ + public abstract long size(); + + /** + * Read data from the source at the given offset. Return a HostMemoryBuffer for the data + * that was read. + * @param offset where to start reading from. + * @param amount the maximum number of bytes to read. + * @return a buffer that points to the data. + * @throws IOException on any error. + */ + public abstract HostMemoryBuffer hostRead(long offset, long amount) throws IOException; + + + /** + * Called when the buffer returned from hostRead is done. The default is to close the buffer. + */ + protected void onHostBufferDone(HostMemoryBuffer buffer) { + if (buffer != null) { + buffer.close(); + } + } + + /** + * Read data from the source at the given offset into dest. Note that dest should not be closed, + * and no reference to it can outlive the call to hostRead. The target amount to read is + * dest's length. + * @param offset the offset to start reading from in the source. + * @param dest where to write the data. + * @return the actual number of bytes written to dest. + */ + public abstract long hostRead(long offset, HostMemoryBuffer dest) throws IOException; + + /** + * Return true if this supports reading directly to the device else false. The default is + * no device support. This cannot change dynamically. It is typically read just once. + */ + public boolean supportsDeviceRead() { + return false; + } + + /** + * Get the size cutoff between device reads and host reads when device reads are supported. + * Anything larger than the cutoff will be a device read and anything smaller will be a + * host read. By default, the cutoff is 0 so all reads will be device reads if device reads + * are supported. + */ + public long getDeviceReadCutoff() { + return 0; + } + + /** + * Read data from the source at the given offset into dest. Note that dest should not be closed, + * and no reference to it can outlive the call to hostRead. The target amount to read is + * dest's length. + * @param offset the offset to start reading from + * @param dest where to write the data. + * @param stream the stream to do the copy on. + * @return the actual number of bytes written to dest. + */ + public long deviceRead(long offset, DeviceMemoryBuffer dest, + Cuda.Stream stream) throws IOException { + throw new IllegalStateException("Device read is not implemented"); + } + + ///////////////////////////////////////////////// + // Internal methods called from JNI + ///////////////////////////////////////////////// + + private static class NoopCleaner extends MemoryBuffer.MemoryBufferCleaner { + @Override + protected boolean cleanImpl(boolean logErrorIfNotClean) { + return true; + } + + @Override + public boolean isClean() { + return true; + } + } + private static final NoopCleaner cleaner = new NoopCleaner(); + + // Called from JNI + private void onHostBufferDone(long bufferId) { + HostMemoryBuffer hmb = cachedBuffers.remove(bufferId); + if (hmb != null) { + onHostBufferDone(hmb); + } else { + // Called from C++ destructor so avoid throwing... + log.warn("Got a close callback for a buffer we could not find " + bufferId); + } + } + + // Called from JNI + private long hostRead(long offset, long amount, long dst) throws IOException { + if (amount < 0) { + throw new IllegalArgumentException("Cannot allocate more than " + Long.MAX_VALUE + " bytes"); + } + try (HostMemoryBuffer dstBuffer = new HostMemoryBuffer(dst, amount, cleaner)) { + return hostRead(offset, dstBuffer); + } + } + + // Called from JNI + private long[] hostReadBuff(long offset, long amount) throws IOException { + if (amount < 0) { + throw new IllegalArgumentException("Cannot read more than " + Long.MAX_VALUE + " bytes"); + } + HostMemoryBuffer buff = hostRead(offset, amount); + long[] ret = new long[3]; + if (buff != null) { + long id = buff.id; + if (cachedBuffers.put(id, buff) != null) { + throw new IllegalStateException("Already had a buffer cached for " + buff); + } + ret[0] = buff.address; + ret[1] = buff.length; + ret[2] = id; + } // else they are all 0 because java does that already + return ret; + } + + // Called from JNI + private long deviceRead(long offset, long amount, long dst, long stream) throws IOException { + if (amount < 0) { + throw new IllegalArgumentException("Cannot read more than " + Long.MAX_VALUE + " bytes"); + } + Cuda.Stream strm = Cuda.Stream.wrap(stream); + try (DeviceMemoryBuffer dstBuffer = new DeviceMemoryBuffer(dst, amount, cleaner)) { + return deviceRead(offset, dstBuffer, strm); + } + } +} diff --git a/java/src/main/java/ai/rapids/cudf/DataSourceHelper.java b/java/src/main/java/ai/rapids/cudf/DataSourceHelper.java new file mode 100644 index 00000000000..5d4dcb8e4e7 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/DataSourceHelper.java @@ -0,0 +1,44 @@ +/* + * + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +/** + * This is here because we need some JNI methods to work with a DataSource, but + * we also want to cache callback methods at startup for performance reasons. If + * we put both in the same class we will get a deadlock because of how we load + * the JNI. We have a static block that blocks loading the class until the JNI + * library is loaded and the JNI library cannot load until the class is loaded + * and cached. This breaks the loop. + */ +class DataSourceHelper { + static { + NativeDepsLoader.loadNativeDeps(); + } + + static long createWrapperDataSource(DataSource ds) { + return createWrapperDataSource(ds, ds.size(), ds.supportsDeviceRead(), + ds.getDeviceReadCutoff()); + } + + private static native long createWrapperDataSource(DataSource ds, long size, + boolean deviceReadSupport, + long deviceReadCutoff); + + static native void destroyWrapperDataSource(long handle); +} diff --git a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java index c4d9bdb8f91..9eab607ed0b 100644 --- a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,6 +112,10 @@ public static DeviceMemoryBuffer fromRmm(long address, long lengthInBytes, long return new DeviceMemoryBuffer(address, lengthInBytes, rmmBufferAddress); } + DeviceMemoryBuffer(long address, long lengthInBytes, MemoryBufferCleaner cleaner) { + super(address, lengthInBytes, cleaner); + } + DeviceMemoryBuffer(long address, long lengthInBytes, long rmmBufferAddress) { super(address, lengthInBytes, new RmmDeviceBufferCleaner(rmmBufferAddress)); } diff --git a/java/src/main/java/ai/rapids/cudf/MultiBufferDataSource.java b/java/src/main/java/ai/rapids/cudf/MultiBufferDataSource.java new file mode 100644 index 00000000000..6986b6a7fec --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/MultiBufferDataSource.java @@ -0,0 +1,230 @@ +/* + * + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +/** + * This is a DataSource that can take multiple HostMemoryBuffers. They + * are treated as if they are all part of a single file connected end to end. + */ +public class MultiBufferDataSource extends DataSource { + private final long sizeInBytes; + private final HostMemoryBuffer[] hostBuffers; + private final long[] startOffsets; + private final HostMemoryAllocator allocator; + + // Metrics + private long hostReads = 0; + private long hostReadBytes = 0; + private long devReads = 0; + private long devReadBytes = 0; + + /** + * Create a new data source backed by multiple buffers. + * @param buffers the buffers that will back the data source. + */ + public MultiBufferDataSource(HostMemoryBuffer ... buffers) { + this(DefaultHostMemoryAllocator.get(), buffers); + } + + /** + * Create a new data source backed by multiple buffers. + * @param allocator the allocator to use for host buffers, if needed. + * @param buffers the buffers that will back the data source. + */ + public MultiBufferDataSource(HostMemoryAllocator allocator, HostMemoryBuffer ... buffers) { + int numBuffers = buffers.length; + hostBuffers = new HostMemoryBuffer[numBuffers]; + startOffsets = new long[numBuffers]; + + long currentOffset = 0; + for (int i = 0; i < numBuffers; i++) { + HostMemoryBuffer hmb = buffers[i]; + hmb.incRefCount(); + hostBuffers[i] = hmb; + startOffsets[i] = currentOffset; + currentOffset += hmb.getLength(); + } + sizeInBytes = currentOffset; + this.allocator = allocator; + } + + @Override + public long size() { + return sizeInBytes; + } + + private int getStartBufferIndexForOffset(long offset) { + assert (offset >= 0); + + // It is super common to read from the start or end of a file (the header or footer) + // so special case them + if (offset == 0) { + return 0; + } + int startIndex = 0; + int endIndex = startOffsets.length - 1; + if (offset >= startOffsets[endIndex]) { + return endIndex; + } + while (startIndex != endIndex) { + int midIndex = (int)(((long)startIndex + endIndex) / 2); + long midStartOffset = startOffsets[midIndex]; + if (offset >= midStartOffset) { + // It is either in mid or after mid. + if (midIndex == endIndex || offset <= startOffsets[midIndex + 1]) { + // We found it in mid + return midIndex; + } else { + // It is after mid + startIndex = midIndex + 1; + } + } else { + // It is before mid + endIndex = midIndex - 1; + } + } + return startIndex; + } + + + interface DoCopy { + void copyFromHostBuffer(T dest, long destOffset, HostMemoryBuffer src, + long srcOffset, long srcAmount); + } + + private long read(long offset, T dest, DoCopy doCopy) { + assert (offset >= 0); + long realOffset = Math.min(offset, sizeInBytes); + long realAmount = Math.min(sizeInBytes - realOffset, dest.getLength()); + + int index = getStartBufferIndexForOffset(realOffset); + + HostMemoryBuffer buffer = hostBuffers[index]; + long bufferOffset = realOffset - startOffsets[index]; + long bufferAmount = Math.min(buffer.length - bufferOffset, realAmount); + long remainingAmount = realAmount; + long currentOffset = realOffset; + long outputOffset = 0; + + while (remainingAmount > 0) { + doCopy.copyFromHostBuffer(dest, outputOffset, buffer, + bufferOffset, bufferAmount); + remainingAmount -= bufferAmount; + outputOffset += bufferAmount; + currentOffset += bufferAmount; + index++; + if (index < hostBuffers.length) { + buffer = hostBuffers[index]; + bufferOffset = currentOffset - startOffsets[index]; + bufferAmount = Math.min(buffer.length - bufferOffset, remainingAmount); + } + } + + return realAmount; + } + + @Override + public HostMemoryBuffer hostRead(long offset, long amount) { + assert (offset >= 0); + assert (amount >= 0); + long realOffset = Math.min(offset, sizeInBytes); + long realAmount = Math.min(sizeInBytes - realOffset, amount); + + int index = getStartBufferIndexForOffset(realOffset); + + HostMemoryBuffer buffer = hostBuffers[index]; + long bufferOffset = realOffset - startOffsets[index]; + long bufferAmount = Math.min(buffer.length - bufferOffset, realAmount); + if (bufferAmount == realAmount) { + hostReads += 1; + hostReadBytes += realAmount; + // It all fits in a single buffer, so do a zero copy operation + return buffer.slice(bufferOffset, bufferAmount); + } else { + // We will have to allocate a new buffer and copy data into it. + boolean success = false; + HostMemoryBuffer ret = allocator.allocate(realAmount, true); + try { + long amountRead = read(offset, ret, HostMemoryBuffer::copyFromHostBuffer); + assert(amountRead == realAmount); + hostReads += 1; + hostReadBytes += amountRead; + success = true; + return ret; + } finally { + if (!success) { + ret.close(); + } + } + } + } + + @Override + public long hostRead(long offset, HostMemoryBuffer dest) { + long ret = read(offset, dest, HostMemoryBuffer::copyFromHostBuffer); + hostReads += 1; + hostReadBytes += ret; + return ret; + } + + @Override + public boolean supportsDeviceRead() { + return true; + } + + @Override + public long deviceRead(long offset, DeviceMemoryBuffer dest, + Cuda.Stream stream) { + long ret = read(offset, dest, (destParam, destOffset, src, srcOffset, srcAmount) -> + destParam.copyFromHostBufferAsync(destOffset, src, srcOffset, srcAmount, stream)); + devReads += 1; + devReadBytes += ret; + return ret; + } + + + @Override + public void close() { + try { + super.close(); + } finally { + for (HostMemoryBuffer hmb: hostBuffers) { + if (hmb != null) { + hmb.close(); + } + } + } + } + + public long getHostReads() { + return hostReads; + } + + public long getHostReadBytes() { + return hostReadBytes; + } + + public long getDevReads() { + return devReads; + } + + public long getDevReadBytes() { + return devReadBytes; + } +} diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java index c34336ac73f..17d59b757c3 100644 --- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java +++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File f handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), filePath.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId()); - if(handle == 0) { + if (handle == 0) { throw new IllegalStateException("Cannot create native chunked Parquet reader object."); } } @@ -71,18 +71,45 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMe handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), null, buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId()); - if(handle == 0) { + if (handle == 0) { throw new IllegalStateException("Cannot create native chunked Parquet reader object."); } } + /** + * Construct a reader instance from a DataSource + * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read, + * or 0 if there is no limit. + * @param opts The options for Parquet reading. + * @param ds the data source to read from + */ + public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, DataSource ds) { + dataSourceHandle = DataSourceHelper.createWrapperDataSource(ds); + if (dataSourceHandle == 0) { + throw new IllegalStateException("Cannot create native datasource object"); + } + + boolean passed = false; + try { + handle = createWithDataSource(chunkSizeByteLimit, opts.getIncludeColumnNames(), + opts.getReadBinaryAsString(), opts.timeUnit().typeId.getNativeId(), + dataSourceHandle); + passed = true; + } finally { + if (!passed) { + DataSourceHelper.destroyWrapperDataSource(dataSourceHandle); + dataSourceHandle = 0; + } + } + } + /** * Check if the given file has anything left to read. * * @return A boolean value indicating if there is more data to read from file. */ public boolean hasNext() { - if(handle == 0) { + if (handle == 0) { throw new IllegalStateException("Native chunked Parquet reader object may have been closed."); } @@ -104,7 +131,7 @@ public boolean hasNext() { * @return A table of new rows reading from the given file. */ public Table readChunk() { - if(handle == 0) { + if (handle == 0) { throw new IllegalStateException("Native chunked Parquet reader object may have been closed."); } @@ -118,6 +145,10 @@ public void close() { close(handle); handle = 0; } + if (dataSourceHandle != 0) { + DataSourceHelper.destroyWrapperDataSource(dataSourceHandle); + dataSourceHandle = 0; + } } @@ -131,6 +162,7 @@ public void close() { */ private long handle; + private long dataSourceHandle = 0; /** * Create a native chunked Parquet reader object on heap and return its memory address. @@ -147,6 +179,9 @@ public void close() { private static native long create(long chunkSizeByteLimit, String[] filterColumnNames, boolean[] binaryToString, String filePath, long bufferAddrs, long length, int timeUnit); + private static native long createWithDataSource(long chunkedSizeByteLimit, + String[] filterColumnNames, boolean[] binaryToString, int timeUnit, long dataSourceHandle); + private static native boolean hasNext(long handle); private static native long[] readChunk(long handle); diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 51a33ebb72f..3bd1e3f25a7 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -235,6 +235,14 @@ private static native long[] readCSV(String[] columnNames, byte comment, String[] nullValues, String[] trueValues, String[] falseValues) throws CudfException; + private static native long[] readCSVFromDataSource(String[] columnNames, + int[] dTypeIds, int[] dTypeScales, + String[] filterColumnNames, + int headerRow, byte delim, int quoteStyle, byte quote, + byte comment, String[] nullValues, + String[] trueValues, String[] falseValues, + long dataSourceHandle) throws CudfException; + /** * read JSON data and return a pointer to a TableWithMeta object. */ @@ -244,6 +252,12 @@ private static native long readJSON(String[] columnNames, boolean dayFirst, boolean lines, boolean recoverWithNulls) throws CudfException; + private static native long readJSONFromDataSource(String[] columnNames, + int[] dTypeIds, int[] dTypeScales, + boolean dayFirst, boolean lines, + boolean recoverWithNulls, + long dsHandle) throws CudfException; + private static native long readAndInferJSON(long address, long length, boolean dayFirst, boolean lines, boolean recoverWithNulls) throws CudfException; @@ -260,6 +274,10 @@ private static native long readAndInferJSON(long address, long length, private static native long[] readParquet(String[] filterColumnNames, boolean[] binaryToString, String filePath, long address, long length, int timeUnit) throws CudfException; + private static native long[] readParquetFromDataSource(String[] filterColumnNames, + boolean[] binaryToString, int timeUnit, + long dataSourceHandle) throws CudfException; + /** * Read in Avro formatted data. * @param filterColumnNames name of the columns to read, or an empty array if we want to read @@ -271,6 +289,9 @@ private static native long[] readParquet(String[] filterColumnNames, boolean[] b private static native long[] readAvro(String[] filterColumnNames, String filePath, long address, long length) throws CudfException; + private static native long[] readAvroFromDataSource(String[] filterColumnNames, + long dataSourceHandle) throws CudfException; + /** * Setup everything to write parquet formatted data to a file. * @param columnNames names that correspond to the table columns @@ -372,6 +393,11 @@ private static native long[] readORC(String[] filterColumnNames, boolean usingNumPyTypes, int timeUnit, String[] decimal128Columns) throws CudfException; + private static native long[] readORCFromDataSource(String[] filterColumnNames, + boolean usingNumPyTypes, int timeUnit, + String[] decimal128Columns, + long dataSourceHandle) throws CudfException; + /** * Setup everything to write ORC formatted data to a file. * @param columnNames names that correspond to the table columns @@ -881,6 +907,27 @@ public static Table readCSV(Schema schema, CSVOptions opts, HostMemoryBuffer buf opts.getFalseValues())); } + public static Table readCSV(Schema schema, CSVOptions opts, DataSource ds) { + long dsHandle = DataSourceHelper.createWrapperDataSource(ds); + try { + return new Table(readCSVFromDataSource(schema.getColumnNames(), + schema.getTypeIds(), + schema.getTypeScales(), + opts.getIncludeColumnNames(), + opts.getHeaderRow(), + opts.getDelim(), + opts.getQuoteStyle().nativeId, + opts.getQuote(), + opts.getComment(), + opts.getNullValues(), + opts.getTrueValues(), + opts.getFalseValues(), + dsHandle)); + } finally { + DataSourceHelper.destroyWrapperDataSource(dsHandle); + } + } + private static native void writeCSVToFile(long table, String[] columnNames, boolean includeHeader, @@ -1128,6 +1175,24 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b } } + /** + * Read JSON formatted data. + * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema. + * @param opts various JSON parsing options. + * @param ds the DataSource to read from. + * @return the data parsed as a table on the GPU. + */ + public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds) { + long dsHandle = DataSourceHelper.createWrapperDataSource(ds); + try (TableWithMeta twm = new TableWithMeta(readJSONFromDataSource(schema.getColumnNames(), + schema.getTypeIds(), schema.getTypeScales(), opts.isDayFirst(), opts.isLines(), + opts.isRecoverWithNull(), dsHandle))) { + return gatherJSONColumns(schema, twm); + } finally { + DataSourceHelper.destroyWrapperDataSource(dsHandle); + } + } + /** * Read a Parquet file using the default ParquetOptions. * @param path the local file to read. @@ -1214,6 +1279,17 @@ public static Table readParquet(ParquetOptions opts, HostMemoryBuffer buffer, null, buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId())); } + public static Table readParquet(ParquetOptions opts, DataSource ds) { + long dataSourceHandle = DataSourceHelper.createWrapperDataSource(ds); + try { + return new Table(readParquetFromDataSource(opts.getIncludeColumnNames(), + opts.getReadBinaryAsString(), opts.timeUnit().typeId.getNativeId(), + dataSourceHandle)); + } finally { + DataSourceHelper.destroyWrapperDataSource(dataSourceHandle); + } + } + /** * Read an Avro file using the default AvroOptions. * @param path the local file to read. @@ -1297,6 +1373,16 @@ public static Table readAvro(AvroOptions opts, HostMemoryBuffer buffer, null, buffer.getAddress() + offset, len)); } + public static Table readAvro(AvroOptions opts, DataSource ds) { + long dataSourceHandle = DataSourceHelper.createWrapperDataSource(ds); + try { + return new Table(readAvroFromDataSource(opts.getIncludeColumnNames(), + dataSourceHandle)); + } finally { + DataSourceHelper.destroyWrapperDataSource(dataSourceHandle); + } + } + /** * Read a ORC file using the default ORCOptions. * @param path the local file to read. @@ -1388,6 +1474,17 @@ public static Table readORC(ORCOptions opts, HostMemoryBuffer buffer, opts.getDecimal128Columns())); } + public static Table readORC(ORCOptions opts, DataSource ds) { + long dataSourceHandle = DataSourceHelper.createWrapperDataSource(ds); + try { + return new Table(readORCFromDataSource(opts.getIncludeColumnNames(), + opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(), + opts.getDecimal128Columns(), dataSourceHandle)); + } finally { + DataSourceHelper.destroyWrapperDataSource(dataSourceHandle); + } + } + private static class ParquetTableWriter extends TableWriter { HostBufferConsumer consumer; @@ -2262,7 +2359,7 @@ public Table dropDuplicates(int[] keyColumns, DuplicateKeepOption keep, boolean /** * Count how many rows in the table are distinct from one another. - * @param nullEqual if nulls should be considered equal to each other or not. + * @param nullsEqual if nulls should be considered equal to each other or not. */ public int distinctCount(NullEquality nullsEqual) { return distinctCount(nativeHandle, nullsEqual.nullsEqual); diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 0dcfee2cffe..01161a03dd4 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -135,6 +135,7 @@ add_library( src/ColumnViewJni.cu src/CompiledExpression.cpp src/ContiguousTableJni.cpp + src/DataSourceHelperJni.cpp src/HashJoinJni.cpp src/HostMemoryBufferNativeUtilsJni.cpp src/NvcompJni.cpp diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp index 8d0a8bdbfe7..0044385f267 100644 --- a/java/src/main/native/src/ChunkedReaderJni.cpp +++ b/java/src/main/native/src/ChunkedReaderJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,6 +85,40 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create( CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_createWithDataSource( + JNIEnv *env, jclass, jlong chunk_read_limit, jobjectArray filter_col_names, + jbooleanArray j_col_binary_read, jint unit, jlong ds_handle) { + JNI_NULL_CHECK(env, j_col_binary_read, "Null col_binary_read", 0); + JNI_NULL_CHECK(env, ds_handle, "Null DataSouurce", 0); + + try { + cudf::jni::auto_set_device(env); + + cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + + // TODO: This variable is unused now, but we still don't know what to do with it yet. + // As such, it needs to stay here for a little more time before we decide to use it again, + // or remove it completely. + cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read); + (void)n_col_binary_read; + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + auto opts_builder = cudf::io::parquet_reader_options::builder(source); + if (n_filter_col_names.size() > 0) { + opts_builder = opts_builder.columns(n_filter_col_names.as_cpp_vector()); + } + auto const read_opts = opts_builder.convert_strings_to_categories(false) + .timestamp_type(cudf::data_type(static_cast(unit))) + .build(); + + return reinterpret_cast(new cudf::io::chunked_parquet_reader( + static_cast(chunk_read_limit), read_opts)); + } + CATCH_STD(env, 0); +} + JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_hasNext(JNIEnv *env, jclass, jlong handle) { JNI_NULL_CHECK(env, handle, "handle is null", false); diff --git a/java/src/main/native/src/CudfJni.cpp b/java/src/main/native/src/CudfJni.cpp index 0f143086451..d0a25d449a6 100644 --- a/java/src/main/native/src/CudfJni.cpp +++ b/java/src/main/native/src/CudfJni.cpp @@ -175,6 +175,14 @@ JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) { return JNI_ERR; } + if (!cudf::jni::cache_data_source_jni(env)) { + if (!env->ExceptionCheck()) { + env->ThrowNew(env->FindClass("java/lang/RuntimeException"), + "Unable to locate data source helper methods needed by JNI"); + } + return JNI_ERR; + } + return cudf::jni::MINIMUM_JNI_VERSION; } diff --git a/java/src/main/native/src/DataSourceHelperJni.cpp b/java/src/main/native/src/DataSourceHelperJni.cpp new file mode 100644 index 00000000000..8d0e4d36413 --- /dev/null +++ b/java/src/main/native/src/DataSourceHelperJni.cpp @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "cudf_jni_apis.hpp" +#include "jni_utils.hpp" + +namespace { + +#define DATA_SOURCE_CLASS "ai/rapids/cudf/DataSource" + +jclass DataSource_jclass; +jmethodID hostRead_method; +jmethodID hostReadBuff_method; +jmethodID onHostBufferDone_method; +jmethodID deviceRead_method; + +} // anonymous namespace + +namespace cudf { +namespace jni { +bool cache_data_source_jni(JNIEnv *env) { + jclass cls = env->FindClass(DATA_SOURCE_CLASS); + if (cls == nullptr) { + return false; + } + + hostRead_method = env->GetMethodID(cls, "hostRead", "(JJJ)J"); + if (hostRead_method == nullptr) { + return false; + } + + hostReadBuff_method = env->GetMethodID(cls, "hostReadBuff", "(JJ)[J"); + if (hostReadBuff_method == nullptr) { + return false; + } + + onHostBufferDone_method = env->GetMethodID(cls, "onHostBufferDone", "(J)V"); + if (onHostBufferDone_method == nullptr) { + return false; + } + + deviceRead_method = env->GetMethodID(cls, "deviceRead", "(JJJJ)J"); + if (deviceRead_method == nullptr) { + return false; + } + + // Convert local reference to global so it cannot be garbage collected. + DataSource_jclass = static_cast(env->NewGlobalRef(cls)); + if (DataSource_jclass == nullptr) { + return false; + } + return true; +} + +void release_data_source_jni(JNIEnv *env) { + DataSource_jclass = cudf::jni::del_global_ref(env, DataSource_jclass); +} + +class host_buffer_done_callback { +public: + explicit host_buffer_done_callback(JavaVM *jvm, jobject ds, long id) : jvm(jvm), ds(ds), id(id) {} + + host_buffer_done_callback(host_buffer_done_callback const &other) = delete; + host_buffer_done_callback(host_buffer_done_callback &&other) + : jvm(other.jvm), ds(other.ds), id(other.id) { + other.jvm = nullptr; + other.ds = nullptr; + other.id = -1; + } + + host_buffer_done_callback &operator=(host_buffer_done_callback &&other) = delete; + host_buffer_done_callback &operator=(host_buffer_done_callback const &other) = delete; + + ~host_buffer_done_callback() { + // because we are in a destructor we cannot throw an exception, so for now we are + // just going to keep the java exceptions around and have them be thrown when this + // thread returns to the JVM. It might be kind of confusing, but we will not lose + // them. + if (jvm != nullptr) { + // We cannot throw an exception in the destructor, so this is really best effort + JNIEnv *env = nullptr; + if (jvm->GetEnv(reinterpret_cast(&env), cudf::jni::MINIMUM_JNI_VERSION) == JNI_OK) { + env->CallVoidMethod(this->ds, onHostBufferDone_method, id); + } + } + } + +private: + JavaVM *jvm; + jobject ds; + long id; +}; + +class jni_datasource : public cudf::io::datasource { +public: + explicit jni_datasource(JNIEnv *env, jobject ds, size_t ds_size, bool device_read_supported, + size_t device_read_cutoff) + : ds_size(ds_size), device_read_supported(device_read_supported), + device_read_cutoff(device_read_cutoff) { + if (env->GetJavaVM(&jvm) < 0) { + throw std::runtime_error("GetJavaVM failed"); + } + this->ds = add_global_ref(env, ds); + } + + virtual ~jni_datasource() { + JNIEnv *env = nullptr; + if (jvm->GetEnv(reinterpret_cast(&env), cudf::jni::MINIMUM_JNI_VERSION) == JNI_OK) { + ds = del_global_ref(env, ds); + } + ds = nullptr; + } + + std::unique_ptr host_read(size_t offset, size_t size) override { + JNIEnv *env = nullptr; + if (jvm->GetEnv(reinterpret_cast(&env), cudf::jni::MINIMUM_JNI_VERSION) != JNI_OK) { + throw cudf::jni::jni_exception("Could not load JNIEnv"); + } + + jlongArray jbuffer_info = + static_cast(env->CallObjectMethod(this->ds, hostReadBuff_method, offset, size)); + if (env->ExceptionOccurred()) { + throw cudf::jni::jni_exception("Java exception in hostRead"); + } + + cudf::jni::native_jlongArray buffer_info(env, jbuffer_info); + auto ptr = reinterpret_cast(buffer_info[0]); + size_t length = buffer_info[1]; + long id = buffer_info[2]; + + cudf::jni::host_buffer_done_callback cb(this->jvm, this->ds, id); + return std::make_unique>(std::move(cb), ptr, + length); + } + + size_t host_read(size_t offset, size_t size, uint8_t *dst) override { + JNIEnv *env = nullptr; + if (jvm->GetEnv(reinterpret_cast(&env), cudf::jni::MINIMUM_JNI_VERSION) != JNI_OK) { + throw cudf::jni::jni_exception("Could not load JNIEnv"); + } + + jlong amount_read = + env->CallLongMethod(this->ds, hostRead_method, offset, size, reinterpret_cast(dst)); + if (env->ExceptionOccurred()) { + throw cudf::jni::jni_exception("Java exception in hostRead"); + } + return amount_read; + } + + size_t size() const override { return ds_size; } + + bool supports_device_read() const override { return device_read_supported; } + + bool is_device_read_preferred(size_t size) const override { + return device_read_supported && size >= device_read_cutoff; + } + + size_t device_read(size_t offset, size_t size, uint8_t *dst, + rmm::cuda_stream_view stream) override { + JNIEnv *env = nullptr; + if (jvm->GetEnv(reinterpret_cast(&env), cudf::jni::MINIMUM_JNI_VERSION) != JNI_OK) { + throw cudf::jni::jni_exception("Could not load JNIEnv"); + } + + jlong amount_read = + env->CallLongMethod(this->ds, deviceRead_method, offset, size, reinterpret_cast(dst), + reinterpret_cast(stream.value())); + if (env->ExceptionOccurred()) { + throw cudf::jni::jni_exception("Java exception in deviceRead"); + } + return amount_read; + } + + std::future device_read_async(size_t offset, size_t size, uint8_t *dst, + rmm::cuda_stream_view stream) override { + auto amount_read = device_read(offset, size, dst, stream); + // This is a bit ugly, but we don't have a good way or a need to return + // a future for the read + std::promise ret; + ret.set_value(amount_read); + return ret.get_future(); + } + +private: + size_t ds_size; + bool device_read_supported; + size_t device_read_cutoff; + JavaVM *jvm; + jobject ds; +}; +} // namespace jni +} // namespace cudf + +extern "C" { + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_DataSourceHelper_createWrapperDataSource( + JNIEnv *env, jclass, jobject ds, jlong ds_size, jboolean device_read_supported, + jlong device_read_cutoff) { + JNI_NULL_CHECK(env, ds, "Null data source", 0); + try { + cudf::jni::auto_set_device(env); + auto source = + new cudf::jni::jni_datasource(env, ds, ds_size, device_read_supported, device_read_cutoff); + return reinterpret_cast(source); + } + CATCH_STD(env, 0); +} + +JNIEXPORT void JNICALL Java_ai_rapids_cudf_DataSourceHelper_destroyWrapperDataSource(JNIEnv *env, + jclass, + jlong handle) { + try { + cudf::jni::auto_set_device(env); + if (handle != 0) { + auto source = reinterpret_cast(handle); + delete (source); + } + } + CATCH_STD(env, ); +} + +} // extern "C" diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index b208ef8f381..fad19bdf895 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1135,6 +1135,67 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_merge(JNIEnv *env, jclass CATCH_STD(env, NULL); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readCSVFromDataSource( + JNIEnv *env, jclass, jobjectArray col_names, jintArray j_types, jintArray j_scales, + jobjectArray filter_col_names, jint header_row, jbyte delim, jint j_quote_style, jbyte quote, + jbyte comment, jobjectArray null_values, jobjectArray true_values, jobjectArray false_values, + jlong ds_handle) { + JNI_NULL_CHECK(env, null_values, "null_values must be supplied, even if it is empty", NULL); + JNI_NULL_CHECK(env, ds_handle, "no data source handle given", NULL); + + try { + cudf::jni::auto_set_device(env); + cudf::jni::native_jstringArray n_col_names(env, col_names); + cudf::jni::native_jintArray n_types(env, j_types); + cudf::jni::native_jintArray n_scales(env, j_scales); + if (n_types.is_null() != n_scales.is_null()) { + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "types and scales must match null", + NULL); + } + std::vector data_types; + if (!n_types.is_null()) { + if (n_types.size() != n_scales.size()) { + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "types and scales must match size", + NULL); + } + data_types.reserve(n_types.size()); + std::transform(n_types.begin(), n_types.end(), n_scales.begin(), + std::back_inserter(data_types), [](auto type, auto scale) { + return cudf::data_type{static_cast(type), scale}; + }); + } + + cudf::jni::native_jstringArray n_null_values(env, null_values); + cudf::jni::native_jstringArray n_true_values(env, true_values); + cudf::jni::native_jstringArray n_false_values(env, false_values); + cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + auto const quote_style = static_cast(j_quote_style); + + cudf::io::csv_reader_options opts = cudf::io::csv_reader_options::builder(source) + .delimiter(delim) + .header(header_row) + .names(n_col_names.as_cpp_vector()) + .dtypes(data_types) + .use_cols_names(n_filter_col_names.as_cpp_vector()) + .true_values(n_true_values.as_cpp_vector()) + .false_values(n_false_values.as_cpp_vector()) + .na_values(n_null_values.as_cpp_vector()) + .keep_default_na(false) + .na_filter(n_null_values.size() > 0) + .quoting(quote_style) + .quotechar(quote) + .comment(comment) + .build(); + + return convert_table_for_return(env, cudf::io::read_csv(opts).tbl); + } + CATCH_STD(env, NULL); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readCSV( JNIEnv *env, jclass, jobjectArray col_names, jintArray j_types, jintArray j_scales, jobjectArray filter_col_names, jstring inputfilepath, jlong buffer, jlong buffer_length, @@ -1407,6 +1468,72 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_TableWithMeta_releaseTable(JNIE CATCH_STD(env, nullptr); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSONFromDataSource( + JNIEnv *env, jclass, jobjectArray col_names, jintArray j_types, jintArray j_scales, + jboolean day_first, jboolean lines, jboolean recover_with_null, jlong ds_handle) { + + JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); + + try { + cudf::jni::auto_set_device(env); + cudf::jni::native_jstringArray n_col_names(env, col_names); + cudf::jni::native_jintArray n_types(env, j_types); + cudf::jni::native_jintArray n_scales(env, j_scales); + if (n_types.is_null() != n_scales.is_null()) { + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "types and scales must match null", + 0); + } + std::vector data_types; + if (!n_types.is_null()) { + if (n_types.size() != n_scales.size()) { + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "types and scales must match size", + 0); + } + data_types.reserve(n_types.size()); + std::transform(n_types.begin(), n_types.end(), n_scales.begin(), + std::back_inserter(data_types), [](auto const &type, auto const &scale) { + return cudf::data_type{static_cast(type), scale}; + }); + } + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + cudf::io::json_recovery_mode_t recovery_mode = + recover_with_null ? cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL : + cudf::io::json_recovery_mode_t::FAIL; + cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source) + .dayfirst(static_cast(day_first)) + .lines(static_cast(lines)) + .recovery_mode(recovery_mode); + + if (!n_col_names.is_null() && data_types.size() > 0) { + if (n_col_names.size() != n_types.size()) { + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", + "types and column names must match size", 0); + } + + std::map map; + + auto col_names_vec = n_col_names.as_cpp_vector(); + std::transform(col_names_vec.begin(), col_names_vec.end(), data_types.begin(), + std::inserter(map, map.end()), + [](std::string a, cudf::data_type b) { return std::make_pair(a, b); }); + opts.dtypes(map); + } else if (data_types.size() > 0) { + opts.dtypes(data_types); + } else { + // should infer the types + } + + auto result = + std::make_unique(cudf::io::read_json(opts.build())); + + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON( JNIEnv *env, jclass, jobjectArray col_names, jintArray j_types, jintArray j_scales, jstring inputfilepath, jlong buffer, jlong buffer_length, jboolean day_first, jboolean lines, @@ -1489,6 +1616,36 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON( CATCH_STD(env, 0); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquetFromDataSource( + JNIEnv *env, jclass, jobjectArray filter_col_names, jbooleanArray j_col_binary_read, jint unit, + jlong ds_handle) { + + JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); + JNI_NULL_CHECK(env, j_col_binary_read, "null col_binary_read", 0); + + try { + cudf::jni::auto_set_device(env); + + cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read); + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + auto builder = cudf::io::parquet_reader_options::builder(source); + if (n_filter_col_names.size() > 0) { + builder = builder.columns(n_filter_col_names.as_cpp_vector()); + } + + cudf::io::parquet_reader_options opts = + builder.convert_strings_to_categories(false) + .timestamp_type(cudf::data_type(static_cast(unit))) + .build(); + return convert_table_for_return(env, cudf::io::read_parquet(opts).tbl); + } + CATCH_STD(env, NULL); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet( JNIEnv *env, jclass, jobjectArray filter_col_names, jbooleanArray j_col_binary_read, jstring inputfilepath, jlong buffer, jlong buffer_length, jint unit) { @@ -1535,10 +1692,31 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet( CATCH_STD(env, NULL); } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readAvroFromDataSource( + JNIEnv *env, jclass, jobjectArray filter_col_names, jlong ds_handle) { + + JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); + + try { + cudf::jni::auto_set_device(env); + + cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + cudf::io::avro_reader_options opts = cudf::io::avro_reader_options::builder(source) + .columns(n_filter_col_names.as_cpp_vector()) + .build(); + return convert_table_for_return(env, cudf::io::read_avro(opts).tbl); + } + CATCH_STD(env, NULL); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readAvro(JNIEnv *env, jclass, jobjectArray filter_col_names, jstring inputfilepath, jlong buffer, - jlong buffer_length, jint unit) { + jlong buffer_length) { const bool read_buffer = (buffer != 0); if (!read_buffer) { @@ -1715,6 +1893,38 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Table_writeParquetEnd(JNIEnv *env, jc CATCH_STD(env, ) } +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORCFromDataSource( + JNIEnv *env, jclass, jobjectArray filter_col_names, jboolean usingNumPyTypes, jint unit, + jobjectArray dec128_col_names, jlong ds_handle) { + + JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0); + + try { + cudf::jni::auto_set_device(env); + + cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names); + + cudf::jni::native_jstringArray n_dec128_col_names(env, dec128_col_names); + + auto ds = reinterpret_cast(ds_handle); + cudf::io::source_info source{ds}; + + auto builder = cudf::io::orc_reader_options::builder(source); + if (n_filter_col_names.size() > 0) { + builder = builder.columns(n_filter_col_names.as_cpp_vector()); + } + + cudf::io::orc_reader_options opts = + builder.use_index(false) + .use_np_dtypes(static_cast(usingNumPyTypes)) + .timestamp_type(cudf::data_type(static_cast(unit))) + .decimal128_columns(n_dec128_col_names.as_cpp_vector()) + .build(); + return convert_table_for_return(env, cudf::io::read_orc(opts).tbl); + } + CATCH_STD(env, NULL); +} + JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC( JNIEnv *env, jclass, jobjectArray filter_col_names, jstring inputfilepath, jlong buffer, jlong buffer_length, jboolean usingNumPyTypes, jint unit, jobjectArray dec128_col_names) { diff --git a/java/src/main/native/src/cudf_jni_apis.hpp b/java/src/main/native/src/cudf_jni_apis.hpp index 867df80b722..bd82bbd2899 100644 --- a/java/src/main/native/src/cudf_jni_apis.hpp +++ b/java/src/main/native/src/cudf_jni_apis.hpp @@ -134,5 +134,13 @@ void auto_set_device(JNIEnv *env); */ void device_memset_async(JNIEnv *env, rmm::device_buffer &buf, char value); +// +// DataSource APIs +// + +bool cache_data_source_jni(JNIEnv *env); + +void release_data_source_jni(JNIEnv *env); + } // namespace jni } // namespace cudf diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index faa73ac4322..b0dd4122b0e 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -327,6 +327,25 @@ void testReadJSONFile() { } } + @Test + void testReadJSONFromDataSource() throws IOException { + Schema schema = Schema.builder() + .column(DType.STRING, "name") + .column(DType.INT32, "age") + .build(); + JSONOptions opts = JSONOptions.builder() + .withLines(true) + .build(); + try (Table expected = new Table.TestBuilder() + .column("Michael", "Andy", "Justin") + .column(null, 30, 19) + .build(); + MultiBufferDataSource source = sourceFrom(TEST_SIMPLE_JSON_FILE); + Table table = Table.readJSON(schema, opts, source)) { + assertTablesAreEqual(expected, table); + } + } + @Test void testReadJSONFileWithInvalidLines() { Schema schema = Schema.builder() @@ -560,6 +579,126 @@ void testReadCSVBuffer() { } } + byte[][] sliceBytes(byte[] data, int slices) { + slices = Math.min(data.length, slices); + // We are not going to worry about making it super even here. + // The last one gets the extras. + int bytesPerSlice = data.length / slices; + byte[][] ret = new byte[slices][]; + int startingAt = 0; + for (int i = 0; i < (slices - 1); i++) { + ret[i] = new byte[bytesPerSlice]; + System.arraycopy(data, startingAt, ret[i], 0, bytesPerSlice); + startingAt += bytesPerSlice; + } + // Now for the last one + ret[slices - 1] = new byte[data.length - startingAt]; + System.arraycopy(data, startingAt, ret[slices - 1], 0, data.length - startingAt); + return ret; + } + + @Test + void testReadCSVBufferMultiBuffer() { + CSVOptions opts = CSVOptions.builder() + .includeColumn("A") + .includeColumn("B") + .hasHeader() + .withDelim('|') + .withQuote('\'') + .withNullValue("NULL") + .build(); + byte[][] data = sliceBytes(CSV_DATA_BUFFER, 10); + try (Table expected = new Table.TestBuilder() + .column(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + .column(110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, null, 118.2, 119.8) + .build(); + MultiBufferDataSource source = sourceFrom(data); + Table table = Table.readCSV(TableTest.CSV_DATA_BUFFER_SCHEMA, opts, source)) { + assertTablesAreEqual(expected, table); + } + } + + public static byte[] arrayFrom(File f) throws IOException { + long len = f.length(); + if (len > Integer.MAX_VALUE) { + throw new IllegalArgumentException("Sorry cannot read " + f + + " into an array it does not fit"); + } + int remaining = (int)len; + byte[] ret = new byte[remaining]; + try (java.io.FileInputStream fin = new java.io.FileInputStream(f)) { + int at = 0; + while (remaining > 0) { + int amount = fin.read(ret, at, remaining); + at += amount; + remaining -= amount; + } + } + return ret; + } + + public static MultiBufferDataSource sourceFrom(File f) throws IOException { + long len = f.length(); + byte[] tmp = new byte[(int)Math.min(32 * 1024, len)]; + try (HostMemoryBuffer buffer = HostMemoryBuffer.allocate(len)) { + try (java.io.FileInputStream fin = new java.io.FileInputStream(f)) { + long at = 0; + while (at < len) { + int amount = fin.read(tmp); + buffer.setBytes(at, tmp, 0, amount); + at += amount; + } + } + return new MultiBufferDataSource(buffer); + } + } + + public static MultiBufferDataSource sourceFrom(byte[] data) { + long len = data.length; + try (HostMemoryBuffer buffer = HostMemoryBuffer.allocate(len)) { + buffer.setBytes(0, data, 0, len); + return new MultiBufferDataSource(buffer); + } + } + + public static MultiBufferDataSource sourceFrom(byte[][] data) { + HostMemoryBuffer[] buffers = new HostMemoryBuffer[data.length]; + try { + for (int i = 0; i < data.length; i++) { + byte[] subData = data[i]; + buffers[i] = HostMemoryBuffer.allocate(subData.length); + buffers[i].setBytes(0, subData, 0, subData.length); + } + return new MultiBufferDataSource(buffers); + } finally { + for (HostMemoryBuffer buffer: buffers) { + if (buffer != null) { + buffer.close(); + } + } + } + } + + @Test + void testReadCSVDataSource() { + CSVOptions opts = CSVOptions.builder() + .includeColumn("A") + .includeColumn("B") + .hasHeader() + .withDelim('|') + .withQuote('\'') + .withNullValue("NULL") + .build(); + try (Table expected = new Table.TestBuilder() + .column(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + .column(110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, null, 118.2, 119.8) + .build(); + MultiBufferDataSource source = sourceFrom(TableTest.CSV_DATA_BUFFER); + Table table = Table.readCSV(TableTest.CSV_DATA_BUFFER_SCHEMA, opts, source)) { + assertTablesAreEqual(expected, table); + } + } + @Test void testReadCSVWithOffset() { CSVOptions opts = CSVOptions.builder() @@ -864,6 +1003,37 @@ void testReadParquet() { } } + @Test + void testReadParquetFromDataSource() throws IOException { + ParquetOptions opts = ParquetOptions.builder() + .includeColumn("loan_id") + .includeColumn("zip") + .includeColumn("num_units") + .build(); + try (MultiBufferDataSource source = sourceFrom(TEST_PARQUET_FILE); + Table table = Table.readParquet(opts, source)) { + long rows = table.getRowCount(); + assertEquals(1000, rows); + assertTableTypes(new DType[]{DType.INT64, DType.INT32, DType.INT32}, table); + } + } + + @Test + void testReadParquetMultiBuffer() throws IOException { + ParquetOptions opts = ParquetOptions.builder() + .includeColumn("loan_id") + .includeColumn("zip") + .includeColumn("num_units") + .build(); + byte [][] data = sliceBytes(arrayFrom(TEST_PARQUET_FILE), 10); + try (MultiBufferDataSource source = sourceFrom(data); + Table table = Table.readParquet(opts, source)) { + long rows = table.getRowCount(); + assertEquals(1000, rows); + assertTableTypes(new DType[]{DType.INT64, DType.INT32, DType.INT32}, table); + } + } + @Test void testReadParquetBinary() { ParquetOptions opts = ParquetOptions.builder() @@ -1018,6 +1188,23 @@ void testChunkedReadParquet() { } } + @Test + void testChunkedReadParquetFromDataSource() throws IOException { + try (MultiBufferDataSource source = sourceFrom(TEST_PARQUET_FILE_CHUNKED_READ); + ParquetChunkedReader reader = new ParquetChunkedReader(240000, ParquetOptions.DEFAULT, source)) { + int numChunks = 0; + long totalRows = 0; + while(reader.hasNext()) { + ++numChunks; + try(Table chunk = reader.readChunk()) { + totalRows += chunk.getRowCount(); + } + } + assertEquals(2, numChunks); + assertEquals(40000, totalRows); + } + } + @Test void testReadAvro() { AvroOptions opts = AvroOptions.builder() @@ -1037,6 +1224,26 @@ void testReadAvro() { } } + @Test + void testReadAvroFromDataSource() throws IOException { + AvroOptions opts = AvroOptions.builder() + .includeColumn("bool_col") + .includeColumn("int_col") + .includeColumn("timestamp_col") + .build(); + + try (Table expected = new Table.TestBuilder() + .column(true, false, true, false, true, false, true, false) + .column(0, 1, 0, 1, 0, 1, 0, 1) + .column(1235865600000000L, 1235865660000000L, 1238544000000000L, 1238544060000000L, + 1233446400000000L, 1233446460000000L, 1230768000000000L, 1230768060000000L) + .build(); + MultiBufferDataSource source = sourceFrom(TEST_ALL_TYPES_PLAIN_AVRO_FILE); + Table table = Table.readAvro(opts, source)) { + assertTablesAreEqual(expected, table); + } + } + @Test void testReadAvroBuffer() throws IOException{ AvroOptions opts = AvroOptions.builder() @@ -1094,6 +1301,24 @@ void testReadORC() { } } + @Test + void testReadORCFromDataSource() throws IOException { + ORCOptions opts = ORCOptions.builder() + .includeColumn("string1") + .includeColumn("float1") + .includeColumn("int1") + .build(); + try (Table expected = new Table.TestBuilder() + .column("hi","bye") + .column(1.0f,2.0f) + .column(65536,65536) + .build(); + MultiBufferDataSource source = sourceFrom(TEST_ORC_FILE); + Table table = Table.readORC(opts, source)) { + assertTablesAreEqual(expected, table); + } + } + @Test void testReadORCBuffer() throws IOException { ORCOptions opts = ORCOptions.builder() From 15baa00693ab4aa59f99ccb417c613880789d047 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:31:39 +0200 Subject: [PATCH 54/76] Fixes behaviour for incomplete lines when `recover_with_nulls` is enabled (#14252) Closes https://github.com/rapidsai/cudf/issues/14227. Adapts the behaviour of the JSON finite-state transducer (FST) when `recover_with_nulls` is `true` to be more strict and reject lines that contain incomplete JSON objects (aka records) or JSON arrays (aka lists). Authors: - Elias Stehle (https://github.com/elstehle) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14252 --- cpp/src/io/json/nested_json_gpu.cu | 703 +++++++++++++++-------------- cpp/tests/io/json_test.cpp | 45 +- cpp/tests/io/nested_json_test.cpp | 23 +- 3 files changed, 401 insertions(+), 370 deletions(-) diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 06ac11485cb..c9107357239 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -660,13 +660,13 @@ auto get_transition_table(json_format_cfg_t format) PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_BOV, PD_LON, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_BOV, PD_LON}; pda_tt[static_cast(pda_state_t::PD_STR)] = { - PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, - PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, - PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR}; + PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR, + PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR, + PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR}; pda_tt[static_cast(pda_state_t::PD_SCE)] = { - PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, - PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, - PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR}; + PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR, + PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR, + PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR}; pda_tt[static_cast(pda_state_t::PD_PVL)] = { PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_BOV, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_BOV, PD_ERR, @@ -680,9 +680,9 @@ auto get_transition_table(json_format_cfg_t format) PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_BOV, PD_FLN}; pda_tt[static_cast(pda_state_t::PD_FNE)] = { - PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, - PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, - PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN}; + PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR, + PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR, + PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_BOV, PD_FLN}; pda_tt[static_cast(pda_state_t::PD_PFN)] = { PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR, @@ -697,8 +697,11 @@ auto get_transition_table(json_format_cfg_t format) /** * @brief Getting the translation table + * @param recover_from_error Whether or not the tokenizer should recover from invalid lines. If + * `recover_from_error` is true, invalid JSON lines end with the token sequence (`ErrorBegin`, + * `LineEn`) and incomplete JSON lines (e.g., `{"a":123\n`) are treated as invalid lines. */ -auto get_translation_table(bool include_line_delimiter) +auto get_translation_table(bool recover_from_error) { constexpr auto StructBegin = token_t::StructBegin; constexpr auto StructEnd = token_t::StructEnd; @@ -715,76 +718,83 @@ auto get_translation_table(bool include_line_delimiter) constexpr auto ErrorBegin = token_t::ErrorBegin; /** - * @brief Appends token_t::LineEnd token to the given token sequence, if and only if - * `include_line_delimiter` is true. + * @brief Instead of specifying the verbose translation tables twice (i.e., once when + * `recover_from_error` is true and once when it is false), we use `nl_tokens` to specialize the + * translation table where it differs depending on the `recover_from_error` option. If and only if + * `recover_from_error` is true, `recovering_tokens` are returned along with a token_t::LineEnd + * token, otherwise `regular_tokens` is returned. */ - auto nl_tokens = [include_line_delimiter](std::vector tokens) { - if (include_line_delimiter) { tokens.push_back(token_t::LineEnd); } - return tokens; + auto nl_tokens = [recover_from_error](std::vector regular_tokens, + std::vector recovering_tokens) { + if (recover_from_error) { + recovering_tokens.push_back(token_t::LineEnd); + return recovering_tokens; + } + return regular_tokens; }; std::array, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt; - pda_tlt[static_cast(pda_state_t::PD_BOV)] = {{ /*ROOT*/ + pda_tlt[static_cast(pda_state_t::PD_BOV)] = {{ /*ROOT*/ + {StructBegin}, // OPENING_BRACE + {ListBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {StringBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {ValueBegin}, // OTHER + /*LIST*/ {StructBegin}, // OPENING_BRACE {ListBegin}, // OPENING_BRACKET {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET + {ListEnd}, // CLOSING_BRACKET {StringBegin}, // QUOTE {ErrorBegin}, // ESCAPE {ErrorBegin}, // COMMA {ErrorBegin}, // COLON {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ValueBegin}, // OTHER - /*LIST*/ + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {ValueBegin}, // OTHER + /*STRUCT*/ {StructBegin}, // OPENING_BRACE {ListBegin}, // OPENING_BRACKET {ErrorBegin}, // CLOSING_BRACE - {ListEnd}, // CLOSING_BRACKET + {ErrorBegin}, // CLOSING_BRACKET {StringBegin}, // QUOTE {ErrorBegin}, // ESCAPE {ErrorBegin}, // COMMA {ErrorBegin}, // COLON {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ValueBegin}, // OTHER - /*STRUCT*/ - {StructBegin}, // OPENING_BRACE - {ListBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {StringBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ValueBegin}}}; // OTHER + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {ValueBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_BOA)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER /*LIST*/ - {StructBegin}, // OPENING_BRACE - {ListBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ListEnd}, // CLOSING_BRACKET - {StringBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ValueBegin}, // OTHER + {StructBegin}, // OPENING_BRACE + {ListBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ListEnd}, // CLOSING_BRACKET + {StringBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {ValueBegin}, // OTHER /*STRUCT*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -795,33 +805,33 @@ auto get_translation_table(bool include_line_delimiter) {ErrorBegin}, // COMMA {ErrorBegin}, // COLON {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK + nl_tokens({}, {ErrorBegin}), // LINE_BREAK {ErrorBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_LON)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ValueEnd}, // WHITE_SPACE - nl_tokens({ValueEnd}), // LINE_BREAK - {}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ValueEnd}, // WHITE_SPACE + nl_tokens({ValueEnd}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ValueEnd, ListEnd}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ValueEnd}, // COMMA - {ErrorBegin}, // COLON - {ValueEnd}, // WHITE_SPACE - nl_tokens({ValueEnd}), // LINE_BREAK - {}, // OTHER + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ValueEnd, ListEnd}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ValueEnd}, // COMMA + {ErrorBegin}, // COLON + {ValueEnd}, // WHITE_SPACE + nl_tokens({ValueEnd}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*STRUCT*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -832,108 +842,108 @@ auto get_translation_table(bool include_line_delimiter) {ValueEnd, StructMemberEnd}, // COMMA {ErrorBegin}, // COLON {ValueEnd}, // WHITE_SPACE - nl_tokens({ValueEnd}), // LINE_BREAK + nl_tokens({ValueEnd}, {ErrorBegin}), // LINE_BREAK {}}}; // OTHER - pda_tlt[static_cast(pda_state_t::PD_STR)] = {{ /*ROOT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {StringEnd}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + pda_tlt[static_cast(pda_state_t::PD_STR)] = {{ /*ROOT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {StringEnd}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*LIST*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {StringEnd}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {StringEnd}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {StringEnd}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER - - pda_tlt[static_cast(pda_state_t::PD_SCE)] = {{ /*ROOT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {StringEnd}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}}}; // OTHER + + pda_tlt[static_cast(pda_state_t::PD_SCE)] = {{ /*ROOT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*LIST*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}, // OTHER /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_PVL)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ErrorBegin}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {ErrorBegin}, // OTHER /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ListEnd}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ErrorBegin}, // OTHER + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ListEnd}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {}, // COMMA + {ErrorBegin}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER /*STRUCT*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -944,34 +954,34 @@ auto get_translation_table(bool include_line_delimiter) {StructMemberEnd}, // COMMA {ErrorBegin}, // COLON {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK + nl_tokens({}, {ErrorBegin}), // LINE_BREAK {ErrorBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_BFN)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER /*STRUCT*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -982,156 +992,159 @@ auto get_translation_table(bool include_line_delimiter) {ErrorBegin}, // COMMA {ErrorBegin}, // COLON {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK + nl_tokens({}, {ErrorBegin}), // LINE_BREAK {ErrorBegin}}}; // OTHER - pda_tlt[static_cast(pda_state_t::PD_FLN)] = {{ /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {FieldNameEnd}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER - - pda_tlt[static_cast(pda_state_t::PD_FNE)] = {{ /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER - - pda_tlt[static_cast(pda_state_t::PD_PFN)] = {{ /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*LIST*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER - /*STRUCT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ErrorBegin}}}; // OTHER - - pda_tlt[static_cast(pda_state_t::PD_ERR)] = {{ /*ROOT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + pda_tlt[static_cast(pda_state_t::PD_FLN)] = { + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*LIST*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*STRUCT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {FieldNameEnd}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}}}; // OTHER + + pda_tlt[static_cast(pda_state_t::PD_FNE)] = { + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*LIST*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*STRUCT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {}}}; // OTHER + + pda_tlt[static_cast(pda_state_t::PD_PFN)] = { + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*LIST*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER + /*STRUCT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {ErrorBegin}), // LINE_BREAK + {ErrorBegin}}}; // OTHER + + pda_tlt[static_cast(pda_state_t::PD_ERR)] = {{ /*ROOT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {}, // OTHER /*LIST*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {}, // OTHER /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {}}}; // OTHER return pda_tlt; } diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 7c911ac2e04..2ddb0b76544 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -1962,7 +1962,31 @@ TEST_F(JsonReaderTest, JSONLinesRecovering) "\n" "\n" // 4 -> a: 123 (valid) - R"({"a":123})"; + R"({"a":4})" + "\n" + // 5 -> (invalid) + R"({"a":5)" + "\n" + // 6 -> (invalid) + R"({"a":6 )" + "\n" + // 7 -> (invalid) + R"({"b":[7 )" + "\n" + // 8 -> a: 8 (valid) + R"({"a":8})" + "\n" + // 9 -> (invalid) + R"({"d":{"unterminated_field_name)" + "\n" + // 10 -> (invalid) + R"({"d":{)" + "\n" + // 11 -> (invalid) + R"({"d":{"123",)" + "\n" + // 12 -> a: 12 (valid) + R"({"a":12})"; auto filepath = temp_env->get_temp_dir() + "RecoveringLines.json"; { @@ -1978,17 +2002,22 @@ TEST_F(JsonReaderTest, JSONLinesRecovering) cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); - EXPECT_EQ(result.tbl->num_rows(), 5); + EXPECT_EQ(result.tbl->num_rows(), 13); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64); EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64); - std::vector a_validity{true, false, false, false, true}; - std::vector c_validity{false, false, false, true, false}; + std::vector a_validity{ + true, false, false, false, true, false, false, false, true, false, false, false, true}; + std::vector c_validity{ + false, false, false, true, false, false, false, false, false, false, false, false, false}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), - int64_wrapper{{-2, 0, 0, 0, 123}, a_validity.cbegin()}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), - float64_wrapper{{0.0, 0.0, 0.0, 1.2, 0.0}, c_validity.cbegin()}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + result.tbl->get_column(0), + int64_wrapper{{-2, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 12}, a_validity.cbegin()}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + result.tbl->get_column(1), + float64_wrapper{{0.0, 0.0, 0.0, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + c_validity.cbegin()}); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp index 00d657108b8..3cb7e1f287a 100644 --- a/cpp/tests/io/nested_json_test.cpp +++ b/cpp/tests/io/nested_json_test.cpp @@ -569,23 +569,12 @@ TEST_F(JsonTest, RecoveringTokenStream) // Line 0 (invalid) {0, token_t::StructBegin}, {0, token_t::StructEnd}, - // Line 1 (valid) - {10, token_t::StructBegin}, - {11, token_t::StructMemberBegin}, - {11, token_t::FieldNameBegin}, - {13, token_t::FieldNameEnd}, - // Line 2 (valid) - {16, token_t::StructBegin}, - {17, token_t::StructMemberBegin}, - {17, token_t::FieldNameBegin}, - {19, token_t::FieldNameEnd}, - {21, token_t::StructBegin}, - {22, token_t::StructMemberBegin}, - {22, token_t::FieldNameBegin}, - {24, token_t::FieldNameEnd}, - {26, token_t::ListBegin}, - {27, token_t::ValueBegin}, - {30, token_t::ValueEnd}, + // Line 1 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + // Line 2 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, // Line 3 (valid) {31, token_t::StructBegin}, {32, token_t::StructMemberBegin}, From 135879368a8fcecda0a1d85bcf18b7e15cd0269d Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 11 Oct 2023 10:28:59 -0400 Subject: [PATCH 55/76] Update Changelog [skip ci] --- CHANGELOG.md | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76abf241d96..ecd547ab5b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,265 @@ +# cuDF 23.10.00 (11 Oct 2023) + +## 🚨 Breaking Changes + +- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt) +- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar) +- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl) +- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl) +- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar) +- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt) +- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt) +- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar) +- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr) +- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar) +- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar) +- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar) +- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule) + +## 🐛 Bug Fixes + +- Fix inaccurate ceil/floor and inaccurate rescaling casts of fixed-point values. ([#14242](https://github.com/rapidsai/cudf/pull/14242)) [@bdice](https://github.com/bdice) +- Fix inaccuracy in decimal128 rounding. ([#14233](https://github.com/rapidsai/cudf/pull/14233)) [@bdice](https://github.com/bdice) +- Workaround for illegal instruction error in sm90 for warp instrinsics with mask ([#14201](https://github.com/rapidsai/cudf/pull/14201)) [@karthikeyann](https://github.com/karthikeyann) +- Fix pytorch related pytest ([#14198](https://github.com/rapidsai/cudf/pull/14198)) [@galipremsagar](https://github.com/galipremsagar) +- Pin to `aws-sdk-cpp<1.11` ([#14173](https://github.com/rapidsai/cudf/pull/14173)) [@pentschev](https://github.com/pentschev) +- Fix assert failure for range window functions ([#14168](https://github.com/rapidsai/cudf/pull/14168)) [@mythrocks](https://github.com/mythrocks) +- Fix Memcheck error found in JSON_TEST JsonReaderTest.ErrorStrings ([#14164](https://github.com/rapidsai/cudf/pull/14164)) [@karthikeyann](https://github.com/karthikeyann) +- Fix calls to copy_bitmask to pass stream parameter ([#14158](https://github.com/rapidsai/cudf/pull/14158)) [@davidwendt](https://github.com/davidwendt) +- Fix DataFrame from Series with different CategoricalIndexes ([#14157](https://github.com/rapidsai/cudf/pull/14157)) [@mroeschke](https://github.com/mroeschke) +- Pin to numpy<1.25 and numba<0.58 to avoid errors and deprecation warnings-as-errors. ([#14156](https://github.com/rapidsai/cudf/pull/14156)) [@bdice](https://github.com/bdice) +- Fix kernel launch error for cudf::io::orc::gpu::rowgroup_char_counts_kernel ([#14139](https://github.com/rapidsai/cudf/pull/14139)) [@davidwendt](https://github.com/davidwendt) +- Don't sort columns for DataFrame init from list of Series ([#14136](https://github.com/rapidsai/cudf/pull/14136)) [@mroeschke](https://github.com/mroeschke) +- Fix DataFrame.values with no columns but index ([#14134](https://github.com/rapidsai/cudf/pull/14134)) [@mroeschke](https://github.com/mroeschke) +- Avoid circular cimports in _lib/cpp/reduce.pxd ([#14125](https://github.com/rapidsai/cudf/pull/14125)) [@vyasr](https://github.com/vyasr) +- Add support for nested dict in `DataFrame` constructor ([#14119](https://github.com/rapidsai/cudf/pull/14119)) [@galipremsagar](https://github.com/galipremsagar) +- Restrict iterables of `DataFrame`'s as input to `DataFrame` constructor ([#14118](https://github.com/rapidsai/cudf/pull/14118)) [@galipremsagar](https://github.com/galipremsagar) +- Allow `numeric_only=True` for reduction operations on numeric types ([#14111](https://github.com/rapidsai/cudf/pull/14111)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve name of the column while initializing a `DataFrame` ([#14110](https://github.com/rapidsai/cudf/pull/14110)) [@galipremsagar](https://github.com/galipremsagar) +- Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 ([#14108](https://github.com/rapidsai/cudf/pull/14108)) [@robertmaynard](https://github.com/robertmaynard) +- Drop `kwargs` from `Series.count` ([#14106](https://github.com/rapidsai/cudf/pull/14106)) [@galipremsagar](https://github.com/galipremsagar) +- Fix naming issues with `Index.to_frame` and `MultiIndex.to_frame` APIs ([#14105](https://github.com/rapidsai/cudf/pull/14105)) [@galipremsagar](https://github.com/galipremsagar) +- Only use memory resources that haven't been freed ([#14103](https://github.com/rapidsai/cudf/pull/14103)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for `__round__` in `Series` and `DataFrame` ([#14099](https://github.com/rapidsai/cudf/pull/14099)) [@galipremsagar](https://github.com/galipremsagar) +- Validate ignore_index type in drop_duplicates ([#14098](https://github.com/rapidsai/cudf/pull/14098)) [@mroeschke](https://github.com/mroeschke) +- Fix renaming `Series` and `Index` ([#14080](https://github.com/rapidsai/cudf/pull/14080)) [@galipremsagar](https://github.com/galipremsagar) +- Raise NotImplementedError in to_datetime if Z (or tz component) in string ([#14074](https://github.com/rapidsai/cudf/pull/14074)) [@mroeschke](https://github.com/mroeschke) +- Raise NotImplementedError for datetime strings with UTC offset ([#14070](https://github.com/rapidsai/cudf/pull/14070)) [@mroeschke](https://github.com/mroeschke) +- Update pyarrow-related dispatch logic in dask_cudf ([#14069](https://github.com/rapidsai/cudf/pull/14069)) [@rjzamora](https://github.com/rjzamora) +- Use `conda mambabuild` rather than `mamba mambabuild` ([#14067](https://github.com/rapidsai/cudf/pull/14067)) [@wence-](https://github.com/wence-) +- Raise NotImplementedError in to_datetime with dayfirst without infer_format ([#14058](https://github.com/rapidsai/cudf/pull/14058)) [@mroeschke](https://github.com/mroeschke) +- Fix various issues in `Index.intersection` ([#14054](https://github.com/rapidsai/cudf/pull/14054)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `Index.difference` to match with pandas ([#14053](https://github.com/rapidsai/cudf/pull/14053)) [@galipremsagar](https://github.com/galipremsagar) +- Fix empty string column construction ([#14052](https://github.com/rapidsai/cudf/pull/14052)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `IntervalIndex.union` to preserve type-metadata ([#14051](https://github.com/rapidsai/cudf/pull/14051)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar) +- Ignore compile_commands.json ([#14048](https://github.com/rapidsai/cudf/pull/14048)) [@harrism](https://github.com/harrism) +- Raise TypeError for any non-parseable argument in to_datetime ([#14044](https://github.com/rapidsai/cudf/pull/14044)) [@mroeschke](https://github.com/mroeschke) +- Raise NotImplementedError for to_datetime with z format ([#14037](https://github.com/rapidsai/cudf/pull/14037)) [@mroeschke](https://github.com/mroeschke) +- Implement `sort_remaining` for `sort_index` ([#14033](https://github.com/rapidsai/cudf/pull/14033)) [@wence-](https://github.com/wence-) +- Raise NotImplementedError for Categoricals with timezones ([#14032](https://github.com/rapidsai/cudf/pull/14032)) [@mroeschke](https://github.com/mroeschke) +- Temporary fix Parquet metadata with empty value string being ignored from writing ([#14026](https://github.com/rapidsai/cudf/pull/14026)) [@ttnghia](https://github.com/ttnghia) +- Preserve types of scalar being returned when possible in `quantile` ([#14014](https://github.com/rapidsai/cudf/pull/14014)) [@galipremsagar](https://github.com/galipremsagar) +- Fix return type of `MultiIndex.difference` ([#14009](https://github.com/rapidsai/cudf/pull/14009)) [@galipremsagar](https://github.com/galipremsagar) +- Raise an error when timezone subtypes are encountered in `pd.IntervalDtype` ([#14006](https://github.com/rapidsai/cudf/pull/14006)) [@galipremsagar](https://github.com/galipremsagar) +- Fix map column can not be non-nullable for java ([#14003](https://github.com/rapidsai/cudf/pull/14003)) [@res-life](https://github.com/res-life) +- Fix `name` selection in `Index.difference` and `Index.intersection` ([#13986](https://github.com/rapidsai/cudf/pull/13986)) [@galipremsagar](https://github.com/galipremsagar) +- Restore column type metadata with `dropna` to fix `factorize` API ([#13980](https://github.com/rapidsai/cudf/pull/13980)) [@galipremsagar](https://github.com/galipremsagar) +- Use thread_index_type to avoid out of bounds accesses in conditional joins ([#13971](https://github.com/rapidsai/cudf/pull/13971)) [@vyasr](https://github.com/vyasr) +- Fix `MultiIndex.to_numpy` to return numpy array with tuples ([#13966](https://github.com/rapidsai/cudf/pull/13966)) [@galipremsagar](https://github.com/galipremsagar) +- Use cudf::thread_index_type in get_json_object and tdigest kernels ([#13962](https://github.com/rapidsai/cudf/pull/13962)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix an issue with `IntervalIndex.repr` when null values are present ([#13958](https://github.com/rapidsai/cudf/pull/13958)) [@galipremsagar](https://github.com/galipremsagar) +- Fix type metadata issue preservation with `Column.unique` ([#13957](https://github.com/rapidsai/cudf/pull/13957)) [@galipremsagar](https://github.com/galipremsagar) +- Handle `Interval` scalars when passed in list-like inputs to `cudf.Index` ([#13956](https://github.com/rapidsai/cudf/pull/13956)) [@galipremsagar](https://github.com/galipremsagar) +- Fix setting of categories order when `dtype` is passed to a `CategoricalColumn` ([#13955](https://github.com/rapidsai/cudf/pull/13955)) [@galipremsagar](https://github.com/galipremsagar) +- Handle `as_index` in `GroupBy.apply` ([#13951](https://github.com/rapidsai/cudf/pull/13951)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Raise error for string types in `nsmallest` and `nlargest` ([#13946](https://github.com/rapidsai/cudf/pull/13946)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `index` of `Groupby.apply` results when it is performed on empty objects ([#13944](https://github.com/rapidsai/cudf/pull/13944)) [@galipremsagar](https://github.com/galipremsagar) +- Fix integer overflow in shim `device_sum` functions ([#13943](https://github.com/rapidsai/cudf/pull/13943)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix type mismatch in groupby reduction for empty objects ([#13942](https://github.com/rapidsai/cudf/pull/13942)) [@galipremsagar](https://github.com/galipremsagar) +- Fixed processed bytes calculation in APPLY_BOOLEAN_MASK benchmark. ([#13937](https://github.com/rapidsai/cudf/pull/13937)) [@Blonck](https://github.com/Blonck) +- Fix construction of `Grouping` objects ([#13932](https://github.com/rapidsai/cudf/pull/13932)) [@galipremsagar](https://github.com/galipremsagar) +- Fix an issue with `loc` when column names is `MultiIndex` ([#13929](https://github.com/rapidsai/cudf/pull/13929)) [@galipremsagar](https://github.com/galipremsagar) +- Fix handling of typecasting in `searchsorted` ([#13925](https://github.com/rapidsai/cudf/pull/13925)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve index `name` in `reindex` ([#13917](https://github.com/rapidsai/cudf/pull/13917)) [@galipremsagar](https://github.com/galipremsagar) +- Use `cudf::thread_index_type` in cuIO to prevent overflow in row indexing ([#13910](https://github.com/rapidsai/cudf/pull/13910)) [@vuule](https://github.com/vuule) +- Fix for encodings listed in the Parquet column chunk metadata ([#13907](https://github.com/rapidsai/cudf/pull/13907)) [@etseidl](https://github.com/etseidl) +- Use cudf::thread_index_type in concatenate.cu. ([#13906](https://github.com/rapidsai/cudf/pull/13906)) [@bdice](https://github.com/bdice) +- Use cudf::thread_index_type in replace.cu. ([#13905](https://github.com/rapidsai/cudf/pull/13905)) [@bdice](https://github.com/bdice) +- Add noSanitizer tag to Java reduction tests failing with sanitizer in CUDA 12 ([#13904](https://github.com/rapidsai/cudf/pull/13904)) [@jlowe](https://github.com/jlowe) +- Remove the internal use of the cudf's default stream in cuIO ([#13903](https://github.com/rapidsai/cudf/pull/13903)) [@vuule](https://github.com/vuule) +- Use cuda-nvtx-dev CUDA 12 package. ([#13901](https://github.com/rapidsai/cudf/pull/13901)) [@bdice](https://github.com/bdice) +- Use `thread_index_type` to avoid index overflow in grid-stride loops ([#13895](https://github.com/rapidsai/cudf/pull/13895)) [@PointKernel](https://github.com/PointKernel) +- Fix memory access error in cudf::shift for sliced strings ([#13894](https://github.com/rapidsai/cudf/pull/13894)) [@davidwendt](https://github.com/davidwendt) +- Raise error when trying to construct a `DataFrame` with mixed types ([#13889](https://github.com/rapidsai/cudf/pull/13889)) [@galipremsagar](https://github.com/galipremsagar) +- Return `nan` when one variable to be correlated has zero variance in JIT GroupBy Apply ([#13884](https://github.com/rapidsai/cudf/pull/13884)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Correctly detect the BOM mark in `read_csv` with compressed input ([#13881](https://github.com/rapidsai/cudf/pull/13881)) [@vuule](https://github.com/vuule) +- Check for the presence of all values in `MultiIndex.isin` ([#13879](https://github.com/rapidsai/cudf/pull/13879)) [@galipremsagar](https://github.com/galipremsagar) +- Fix nvtext::generate_character_ngrams performance regression for longer strings ([#13874](https://github.com/rapidsai/cudf/pull/13874)) [@davidwendt](https://github.com/davidwendt) +- Fix return type of `MultiIndex.levels` ([#13870](https://github.com/rapidsai/cudf/pull/13870)) [@galipremsagar](https://github.com/galipremsagar) +- Fix List's missing children metadata in JSON writer ([#13869](https://github.com/rapidsai/cudf/pull/13869)) [@karthikeyann](https://github.com/karthikeyann) +- Disable construction of Index when `freq` is set in pandas-compatibility mode ([#13857](https://github.com/rapidsai/cudf/pull/13857)) [@galipremsagar](https://github.com/galipremsagar) +- Fix an issue with fetching `NA` from a `TimedeltaColumn` ([#13853](https://github.com/rapidsai/cudf/pull/13853)) [@galipremsagar](https://github.com/galipremsagar) +- Simplify implementation of interval_range() and fix behaviour for floating `freq` ([#13844](https://github.com/rapidsai/cudf/pull/13844)) [@shwina](https://github.com/shwina) +- Fix binary operations between `Series` and `Index` ([#13842](https://github.com/rapidsai/cudf/pull/13842)) [@galipremsagar](https://github.com/galipremsagar) +- Update make_lists_column_from_scalar to use make_offsets_child_column utility ([#13841](https://github.com/rapidsai/cudf/pull/13841)) [@davidwendt](https://github.com/davidwendt) +- Fix read out of bounds in string concatenate ([#13838](https://github.com/rapidsai/cudf/pull/13838)) [@pentschev](https://github.com/pentschev) +- Raise error for more cases when `timezone-aware` data is passed to `as_column` ([#13835](https://github.com/rapidsai/cudf/pull/13835)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error when trying to construct time-zone aware timestamps ([#13830](https://github.com/rapidsai/cudf/pull/13830)) [@galipremsagar](https://github.com/galipremsagar) +- Fix cuFile I/O factories ([#13829](https://github.com/rapidsai/cudf/pull/13829)) [@vuule](https://github.com/vuule) +- DataFrame with namedtuples uses ._field as column names ([#13824](https://github.com/rapidsai/cudf/pull/13824)) [@mroeschke](https://github.com/mroeschke) +- Branch 23.10 merge 23.08 ([#13822](https://github.com/rapidsai/cudf/pull/13822)) [@vyasr](https://github.com/vyasr) +- Return a Series from JIT GroupBy apply, rather than a DataFrame ([#13820](https://github.com/rapidsai/cudf/pull/13820)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- No need to dlsym EnsureS3Finalized we can call it directly ([#13819](https://github.com/rapidsai/cudf/pull/13819)) [@robertmaynard](https://github.com/robertmaynard) +- Raise error when mixed types are being constructed ([#13816](https://github.com/rapidsai/cudf/pull/13816)) [@galipremsagar](https://github.com/galipremsagar) +- Fix unbounded sequence issue in `DataFrame` constructor ([#13811](https://github.com/rapidsai/cudf/pull/13811)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Byte-Pair-Encoding usage of cuco static-map for storing merge-pairs ([#13807](https://github.com/rapidsai/cudf/pull/13807)) [@davidwendt](https://github.com/davidwendt) +- Fix for Parquet writer when requested pages per row is smaller than fragment size ([#13806](https://github.com/rapidsai/cudf/pull/13806)) [@etseidl](https://github.com/etseidl) +- Remove hangs from trying to construct un-bounded sequences ([#13799](https://github.com/rapidsai/cudf/pull/13799)) [@galipremsagar](https://github.com/galipremsagar) +- Bug/update libcudf to handle arrow12 changes ([#13794](https://github.com/rapidsai/cudf/pull/13794)) [@robertmaynard](https://github.com/robertmaynard) +- Update get_arrow to arrows 12 CMake target name of arrow::xsimd ([#13790](https://github.com/rapidsai/cudf/pull/13790)) [@robertmaynard](https://github.com/robertmaynard) +- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar) +- Fix negative unary operation for boolean type ([#13780](https://github.com/rapidsai/cudf/pull/13780)) [@galipremsagar](https://github.com/galipremsagar) +- Fix contains(`in`) method for `Series` ([#13779](https://github.com/rapidsai/cudf/pull/13779)) [@galipremsagar](https://github.com/galipremsagar) +- Fix binary operation column ordering and missing column issues ([#13778](https://github.com/rapidsai/cudf/pull/13778)) [@galipremsagar](https://github.com/galipremsagar) +- Cast only time of day to nanos to avoid an overflow in Parquet INT96 write ([#13776](https://github.com/rapidsai/cudf/pull/13776)) [@gerashegalov](https://github.com/gerashegalov) +- Preserve names of column object in various APIs ([#13772](https://github.com/rapidsai/cudf/pull/13772)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar) +- Fix construction of DataFrames from dict when columns are provided ([#13766](https://github.com/rapidsai/cudf/pull/13766)) [@wence-](https://github.com/wence-) +- Provide our own Cython declaration for make_unique ([#13746](https://github.com/rapidsai/cudf/pull/13746)) [@wence-](https://github.com/wence-) + +## 📖 Documentation + +- Fix typo in docstring: metadata. ([#14025](https://github.com/rapidsai/cudf/pull/14025)) [@bdice](https://github.com/bdice) +- Fix typo in parquet/page_decode.cuh ([#13849](https://github.com/rapidsai/cudf/pull/13849)) [@XinyuZeng](https://github.com/XinyuZeng) +- Simplify Python doc configuration ([#13826](https://github.com/rapidsai/cudf/pull/13826)) [@vyasr](https://github.com/vyasr) +- Update documentation to reflect recent changes in JSON reader and writer ([#13791](https://github.com/rapidsai/cudf/pull/13791)) [@vuule](https://github.com/vuule) +- Fix all warnings in Python docs ([#13789](https://github.com/rapidsai/cudf/pull/13789)) [@vyasr](https://github.com/vyasr) + +## 🚀 New Features + +- [Java] Add JNI bindings for `integers_to_hex` ([#14205](https://github.com/rapidsai/cudf/pull/14205)) [@razajafri](https://github.com/razajafri) +- Propagate errors from Parquet reader kernels back to host ([#14167](https://github.com/rapidsai/cudf/pull/14167)) [@vuule](https://github.com/vuule) +- JNI for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14154](https://github.com/rapidsai/cudf/pull/14154)) [@ttnghia](https://github.com/ttnghia) +- Expose streams in all public sorting APIs ([#14146](https://github.com/rapidsai/cudf/pull/14146)) [@vyasr](https://github.com/vyasr) +- Enable direct ingestion and production of Arrow scalars ([#14121](https://github.com/rapidsai/cudf/pull/14121)) [@vyasr](https://github.com/vyasr) +- Implement `GroupBy.value_counts` to match pandas API ([#14114](https://github.com/rapidsai/cudf/pull/14114)) [@stmio](https://github.com/stmio) +- Refactor parquet thrift reader ([#14097](https://github.com/rapidsai/cudf/pull/14097)) [@etseidl](https://github.com/etseidl) +- Refactor `hash_reduce_by_row` ([#14095](https://github.com/rapidsai/cudf/pull/14095)) [@ttnghia](https://github.com/ttnghia) +- Support negative preceding/following for ROW window functions ([#14093](https://github.com/rapidsai/cudf/pull/14093)) [@mythrocks](https://github.com/mythrocks) +- Support for progressive parquet chunked reading. ([#14079](https://github.com/rapidsai/cudf/pull/14079)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14045](https://github.com/rapidsai/cudf/pull/14045)) [@ttnghia](https://github.com/ttnghia) +- Expose streams in public search APIs ([#14034](https://github.com/rapidsai/cudf/pull/14034)) [@vyasr](https://github.com/vyasr) +- Expose streams in public replace APIs ([#14010](https://github.com/rapidsai/cudf/pull/14010)) [@vyasr](https://github.com/vyasr) +- Add stream parameter to public cudf::strings::split APIs ([#13997](https://github.com/rapidsai/cudf/pull/13997)) [@davidwendt](https://github.com/davidwendt) +- Expose streams in public filling APIs ([#13990](https://github.com/rapidsai/cudf/pull/13990)) [@vyasr](https://github.com/vyasr) +- Expose streams in public concatenate APIs ([#13987](https://github.com/rapidsai/cudf/pull/13987)) [@vyasr](https://github.com/vyasr) +- Use HostMemoryAllocator in jni::allocate_host_buffer ([#13975](https://github.com/rapidsai/cudf/pull/13975)) [@gerashegalov](https://github.com/gerashegalov) +- Enable fractional null probability for hashing benchmark ([#13967](https://github.com/rapidsai/cudf/pull/13967)) [@Blonck](https://github.com/Blonck) +- Switch pylibcudf-enabled types to use enum class in Cython ([#13931](https://github.com/rapidsai/cudf/pull/13931)) [@vyasr](https://github.com/vyasr) +- Add nvtext::tokenize_with_vocabulary API ([#13930](https://github.com/rapidsai/cudf/pull/13930)) [@davidwendt](https://github.com/davidwendt) +- Rewrite `DataFrame.stack` to support multi level column names ([#13927](https://github.com/rapidsai/cudf/pull/13927)) [@isVoid](https://github.com/isVoid) +- Add HostMemoryAllocator interface ([#13924](https://github.com/rapidsai/cudf/pull/13924)) [@gerashegalov](https://github.com/gerashegalov) +- Global stream pool ([#13922](https://github.com/rapidsai/cudf/pull/13922)) [@etseidl](https://github.com/etseidl) +- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl) +- Translate column size overflow exception to JNI ([#13911](https://github.com/rapidsai/cudf/pull/13911)) [@mythrocks](https://github.com/mythrocks) +- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl) +- Exclude some tests from running with the compute sanitizer ([#13872](https://github.com/rapidsai/cudf/pull/13872)) [@firestarman](https://github.com/firestarman) +- Expand statistics support in ORC writer ([#13848](https://github.com/rapidsai/cudf/pull/13848)) [@vuule](https://github.com/vuule) +- Register the memory mapped buffer in `datasource` to improve H2D throughput ([#13814](https://github.com/rapidsai/cudf/pull/13814)) [@vuule](https://github.com/vuule) +- Add cudf::strings::find function with target per row ([#13808](https://github.com/rapidsai/cudf/pull/13808)) [@davidwendt](https://github.com/davidwendt) +- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt) +- Remove unnecessary pointer copying in JIT GroupBy Apply ([#13792](https://github.com/rapidsai/cudf/pull/13792)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add 'poll' function to custreamz kafka consumer ([#13782](https://github.com/rapidsai/cudf/pull/13782)) [@jdye64](https://github.com/jdye64) +- Support `corr` in `GroupBy.apply` through the jit engine ([#13767](https://github.com/rapidsai/cudf/pull/13767)) [@shwina](https://github.com/shwina) +- Optionally write version 2 page headers in Parquet writer ([#13751](https://github.com/rapidsai/cudf/pull/13751)) [@etseidl](https://github.com/etseidl) +- Support more numeric types in `Groupby.apply` with `engine='jit'` ([#13729](https://github.com/rapidsai/cudf/pull/13729)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- [FEA] Add DELTA_BINARY_PACKED decoding support to Parquet reader ([#13637](https://github.com/rapidsai/cudf/pull/13637)) [@etseidl](https://github.com/etseidl) +- Read FIXED_LEN_BYTE_ARRAY as binary in parquet reader ([#13437](https://github.com/rapidsai/cudf/pull/13437)) [@PointKernel](https://github.com/PointKernel) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for `23.10` release ([#14225](https://github.com/rapidsai/cudf/pull/14225)) [@galipremsagar](https://github.com/galipremsagar) +- update rmm tag path ([#14195](https://github.com/rapidsai/cudf/pull/14195)) [@AyodeAwe](https://github.com/AyodeAwe) +- Disable `Recently Updated` Check ([#14193](https://github.com/rapidsai/cudf/pull/14193)) [@ajschmidt8](https://github.com/ajschmidt8) +- Move cpp/src/hash/hash_allocator.cuh to include/cudf/hashing/detail ([#14163](https://github.com/rapidsai/cudf/pull/14163)) [@davidwendt](https://github.com/davidwendt) +- Add Parquet reader benchmarks for row selection ([#14147](https://github.com/rapidsai/cudf/pull/14147)) [@vuule](https://github.com/vuule) +- Update image names ([#14145](https://github.com/rapidsai/cudf/pull/14145)) [@AyodeAwe](https://github.com/AyodeAwe) +- Support callables in DataFrame.assign ([#14142](https://github.com/rapidsai/cudf/pull/14142)) [@wence-](https://github.com/wence-) +- Reduce memory usage of as_categorical_column ([#14138](https://github.com/rapidsai/cudf/pull/14138)) [@wence-](https://github.com/wence-) +- Replace Python scalar conversions with libcudf ([#14124](https://github.com/rapidsai/cudf/pull/14124)) [@vyasr](https://github.com/vyasr) +- Update to clang 16.0.6. ([#14120](https://github.com/rapidsai/cudf/pull/14120)) [@bdice](https://github.com/bdice) +- Fix type of empty `Index` and raise warning in `Series` constructor ([#14116](https://github.com/rapidsai/cudf/pull/14116)) [@galipremsagar](https://github.com/galipremsagar) +- Add stream parameter to external dict APIs ([#14115](https://github.com/rapidsai/cudf/pull/14115)) [@SurajAralihalli](https://github.com/SurajAralihalli) +- Add fallback matrix for nvcomp. ([#14082](https://github.com/rapidsai/cudf/pull/14082)) [@bdice](https://github.com/bdice) +- [Java] Add recoverWithNull to JSONOptions and pass to Table.readJSON ([#14078](https://github.com/rapidsai/cudf/pull/14078)) [@andygrove](https://github.com/andygrove) +- Remove header tests ([#14072](https://github.com/rapidsai/cudf/pull/14072)) [@ajschmidt8](https://github.com/ajschmidt8) +- Refactor `contains_table` with cuco::static_set ([#14064](https://github.com/rapidsai/cudf/pull/14064)) [@PointKernel](https://github.com/PointKernel) +- Remove debug print in a Parquet test ([#14063](https://github.com/rapidsai/cudf/pull/14063)) [@vuule](https://github.com/vuule) +- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt) +- Expose stream parameter in public strings find APIs ([#14060](https://github.com/rapidsai/cudf/pull/14060)) [@davidwendt](https://github.com/davidwendt) +- Update doxygen to 1.9.1 ([#14059](https://github.com/rapidsai/cudf/pull/14059)) [@vyasr](https://github.com/vyasr) +- Remove the mr from the base fixture ([#14057](https://github.com/rapidsai/cudf/pull/14057)) [@vyasr](https://github.com/vyasr) +- Expose streams in public strings case APIs ([#14056](https://github.com/rapidsai/cudf/pull/14056)) [@davidwendt](https://github.com/davidwendt) +- Refactor libcudf indexalator to typed normalator ([#14043](https://github.com/rapidsai/cudf/pull/14043)) [@davidwendt](https://github.com/davidwendt) +- Use cudf::make_empty_column instead of column_view constructor ([#14030](https://github.com/rapidsai/cudf/pull/14030)) [@davidwendt](https://github.com/davidwendt) +- Remove quadratic runtime due to accessing Frame._dtypes in loop ([#14028](https://github.com/rapidsai/cudf/pull/14028)) [@wence-](https://github.com/wence-) +- Explicitly depend on zlib in conda recipes ([#14018](https://github.com/rapidsai/cudf/pull/14018)) [@wence-](https://github.com/wence-) +- Use grid_stride for stride computations. ([#13996](https://github.com/rapidsai/cudf/pull/13996)) [@bdice](https://github.com/bdice) +- Fix an issue where casting null-array to `object` dtype will result in a failure ([#13994](https://github.com/rapidsai/cudf/pull/13994)) [@galipremsagar](https://github.com/galipremsagar) +- Add tab as literal to cudf::test::to_string output ([#13993](https://github.com/rapidsai/cudf/pull/13993)) [@davidwendt](https://github.com/davidwendt) +- Enable `codes` dtype parity in pandas-compatibility mode for `factorize` API ([#13982](https://github.com/rapidsai/cudf/pull/13982)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `CategoricalIndex` ordering in `Groupby.agg` when pandas-compatibility mode is enabled ([#13978](https://github.com/rapidsai/cudf/pull/13978)) [@galipremsagar](https://github.com/galipremsagar) +- Produce a fatal error if cudf is unable to find pyarrow include directory ([#13976](https://github.com/rapidsai/cudf/pull/13976)) [@cwharris](https://github.com/cwharris) +- Use `thread_index_type` in `partitioning.cu` ([#13973](https://github.com/rapidsai/cudf/pull/13973)) [@divyegala](https://github.com/divyegala) +- Use `cudf::thread_index_type` in `merge.cu` ([#13972](https://github.com/rapidsai/cudf/pull/13972)) [@divyegala](https://github.com/divyegala) +- Use `copy-pr-bot` ([#13970](https://github.com/rapidsai/cudf/pull/13970)) [@ajschmidt8](https://github.com/ajschmidt8) +- Use cudf::thread_index_type in strings custom kernels ([#13968](https://github.com/rapidsai/cudf/pull/13968)) [@davidwendt](https://github.com/davidwendt) +- Add `bytes_per_second` to hash_partition benchmark ([#13965](https://github.com/rapidsai/cudf/pull/13965)) [@Blonck](https://github.com/Blonck) +- Added pinned pool reservation API for java ([#13964](https://github.com/rapidsai/cudf/pull/13964)) [@revans2](https://github.com/revans2) +- Simplify wheel build scripts and allow alphas of RAPIDS dependencies ([#13963](https://github.com/rapidsai/cudf/pull/13963)) [@vyasr](https://github.com/vyasr) +- Add `bytes_per_second` to copy_if_else benchmark ([#13960](https://github.com/rapidsai/cudf/pull/13960)) [@Blonck](https://github.com/Blonck) +- Add pandas compatible output to `Series.unique` ([#13959](https://github.com/rapidsai/cudf/pull/13959)) [@galipremsagar](https://github.com/galipremsagar) +- Add `bytes_per_second` to compiled binaryop benchmark ([#13938](https://github.com/rapidsai/cudf/pull/13938)) [@Blonck](https://github.com/Blonck) +- Unpin `dask` and `distributed` for `23.10` development ([#13935](https://github.com/rapidsai/cudf/pull/13935)) [@galipremsagar](https://github.com/galipremsagar) +- Make HostColumnVector.getRefCount public ([#13934](https://github.com/rapidsai/cudf/pull/13934)) [@abellina](https://github.com/abellina) +- Use cuco::static_set in JSON tree algorithm ([#13928](https://github.com/rapidsai/cudf/pull/13928)) [@karthikeyann](https://github.com/karthikeyann) +- Add java API to get size of host memory needed to copy column view ([#13919](https://github.com/rapidsai/cudf/pull/13919)) [@revans2](https://github.com/revans2) +- Use cudf::size_type instead of int32 where appropriate in nvtext functions ([#13915](https://github.com/rapidsai/cudf/pull/13915)) [@davidwendt](https://github.com/davidwendt) +- Enable hugepage for arrow host allocations ([#13914](https://github.com/rapidsai/cudf/pull/13914)) [@madsbk](https://github.com/madsbk) +- Improve performance of nvtext::edit_distance ([#13912](https://github.com/rapidsai/cudf/pull/13912)) [@davidwendt](https://github.com/davidwendt) +- Ensure cudf internals use pylibcudf in pure Python mode ([#13909](https://github.com/rapidsai/cudf/pull/13909)) [@vyasr](https://github.com/vyasr) +- Use `empty()` instead of `size()` where possible ([#13908](https://github.com/rapidsai/cudf/pull/13908)) [@vuule](https://github.com/vuule) +- [JNI] Adds HostColumnVector.EventHandler for spillability checks ([#13898](https://github.com/rapidsai/cudf/pull/13898)) [@abellina](https://github.com/abellina) +- Return `Timestamp` & `Timedelta` for fetching scalars in `DatetimeIndex` & `TimedeltaIndex` ([#13896](https://github.com/rapidsai/cudf/pull/13896)) [@galipremsagar](https://github.com/galipremsagar) +- Allow explicit `shuffle="p2p"` within dask-cudf API ([#13893](https://github.com/rapidsai/cudf/pull/13893)) [@rjzamora](https://github.com/rjzamora) +- Disable creation of `DatetimeIndex` when `freq` is passed to `cudf.date_range` ([#13890](https://github.com/rapidsai/cudf/pull/13890)) [@galipremsagar](https://github.com/galipremsagar) +- Bring parity with pandas for `datetime` & `timedelta` comparison operations ([#13877](https://github.com/rapidsai/cudf/pull/13877)) [@galipremsagar](https://github.com/galipremsagar) +- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error when `astype(object)` is called in pandas compatibility mode ([#13862](https://github.com/rapidsai/cudf/pull/13862)) [@galipremsagar](https://github.com/galipremsagar) +- Fixes a performance regression in FST ([#13850](https://github.com/rapidsai/cudf/pull/13850)) [@elstehle](https://github.com/elstehle) +- Set native handles to null on close in Java wrapper classes ([#13818](https://github.com/rapidsai/cudf/pull/13818)) [@jlowe](https://github.com/jlowe) +- Avoid use of CUDF_EXPECTS in libcudf unit tests outside of helper functions with return values ([#13812](https://github.com/rapidsai/cudf/pull/13812)) [@vuule](https://github.com/vuule) +- Update `lists::contains` to experimental row comparator ([#13810](https://github.com/rapidsai/cudf/pull/13810)) [@divyegala](https://github.com/divyegala) +- Reduce `lists::contains` dispatches for scalars ([#13805](https://github.com/rapidsai/cudf/pull/13805)) [@divyegala](https://github.com/divyegala) +- Long string optimization for string column parsing in JSON reader ([#13803](https://github.com/rapidsai/cudf/pull/13803)) [@karthikeyann](https://github.com/karthikeyann) +- Raise NotImplementedError for pd.SparseDtype ([#13798](https://github.com/rapidsai/cudf/pull/13798)) [@mroeschke](https://github.com/mroeschke) +- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt) +- Move Spark-indpendent Table debug to cudf Java ([#13783](https://github.com/rapidsai/cudf/pull/13783)) [@gerashegalov](https://github.com/gerashegalov) +- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr) +- Refactor Parquet reader handling of V2 page header info ([#13775](https://github.com/rapidsai/cudf/pull/13775)) [@etseidl](https://github.com/etseidl) +- Branch 23.10 merge 23.08 ([#13773](https://github.com/rapidsai/cudf/pull/13773)) [@vyasr](https://github.com/vyasr) +- Restructure JSON code to correctly reflect legacy/experimental status ([#13757](https://github.com/rapidsai/cudf/pull/13757)) [@vuule](https://github.com/vuule) +- Branch 23.10 merge 23.08 ([#13753](https://github.com/rapidsai/cudf/pull/13753)) [@vyasr](https://github.com/vyasr) +- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar) +- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar) +- Refactors JSON reader's pushdown automaton ([#13716](https://github.com/rapidsai/cudf/pull/13716)) [@elstehle](https://github.com/elstehle) +- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule) + # cuDF 23.08.00 (9 Aug 2023) ## 🚨 Breaking Changes From aa598bc28e6e2459ca6bcfa58f2056134e6591ea Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 11 Oct 2023 15:34:59 -0400 Subject: [PATCH 56/76] Expose stream parameter in public strings split/partition APIs (#14247) Follow on to PR #13997 which did not include all the split APIs or a stream test. Add stream parameter to public APIs: - `cudf::strings::partition()` - `cudf::strings::rpartition()` - `cudf::strings::split_re()` - `cudf::strings::rsplit_re()` - `cudf::strings::split_record_re()` - `cudf::strings::rsplit_record_re()` Also cleaned up some of the doxygen comments. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14247 --- cpp/include/cudf/strings/split/partition.hpp | 22 +++++---- cpp/include/cudf/strings/split/split_re.hpp | 16 +++++-- cpp/src/strings/split/partition.cu | 10 ++-- cpp/src/strings/split/split_re.cu | 12 +++-- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/streams/strings/split_test.cpp | 49 ++++++++++++++++++++ 6 files changed, 89 insertions(+), 22 deletions(-) create mode 100644 cpp/tests/streams/strings/split_test.cpp diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp index 52ffb735eb7..25eedf1e86b 100644 --- a/cpp/include/cudf/strings/split/partition.hpp +++ b/cpp/include/cudf/strings/split/partition.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,15 +51,17 @@ namespace strings { * r[2] is ["cd","g_h"] * @endcode * - * @param strings Strings instance for this operation. + * @param input Strings instance for this operation * @param delimiter UTF-8 encoded string indicating where to split each string. * Default of empty string indicates split on whitespace. - * @param mr Device memory resource used to allocate the returned table's device memory. - * @return New table of strings columns. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory + * @return New table of strings columns */ std::unique_ptr
partition( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& delimiter = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -83,15 +85,17 @@ std::unique_ptr
partition( * r[2] is ["cd","h"] * @endcode * - * @param strings Strings instance for this operation. + * @param input Strings instance for this operation * @param delimiter UTF-8 encoded string indicating where to split each string. * Default of empty string indicates split on whitespace. - * @param mr Device memory resource used to allocate the returned table's device memory. - * @return New strings columns. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory + * @return New strings columns */ std::unique_ptr
rpartition( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& delimiter = string_scalar(""), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index 14fcfaecdcd..f1736cb7e0c 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -75,6 +75,7 @@ struct regex_program; * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned result's device memory * @return A table of columns of strings */ @@ -82,6 +83,7 @@ std::unique_ptr
split_re( strings_column_view const& input, regex_program const& prog, size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -125,17 +127,19 @@ std::unique_ptr
split_re( * * @throw cudf::logic_error if `pattern` is empty. * - * @param input A column of string elements to be split. + * @param input A column of string elements to be split * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. - * @param mr Device memory resource used to allocate the returned result's device memory. - * @return A table of columns of strings. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned result's device memory + * @return A table of columns of strings */ std::unique_ptr
rsplit_re( strings_column_view const& input, regex_program const& prog, size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -185,13 +189,15 @@ std::unique_ptr
rsplit_re( * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned result's device memory - * @return Lists column of strings. + * @return Lists column of strings */ std::unique_ptr split_record_re( strings_column_view const& input, regex_program const& prog, size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -243,6 +249,7 @@ std::unique_ptr split_record_re( * @param prog Regex program instance * @param maxsplit Maximum number of splits to perform. * Default of -1 indicates all possible splits on each string. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned result's device memory * @return Lists column of strings */ @@ -250,6 +257,7 @@ std::unique_ptr rsplit_record_re( strings_column_view const& input, regex_program const& prog, size_type maxsplit = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu index 0c7d119ea38..16e6402cfef 100644 --- a/cpp/src/strings/split/partition.cu +++ b/cpp/src/strings/split/partition.cu @@ -239,20 +239,22 @@ std::unique_ptr
rpartition(strings_column_view const& strings, // external APIs -std::unique_ptr
partition(strings_column_view const& strings, +std::unique_ptr
partition(strings_column_view const& input, string_scalar const& delimiter, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::partition(strings, delimiter, cudf::get_default_stream(), mr); + return detail::partition(input, delimiter, stream, mr); } -std::unique_ptr
rpartition(strings_column_view const& strings, +std::unique_ptr
rpartition(strings_column_view const& input, string_scalar const& delimiter, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rpartition(strings, delimiter, cudf::get_default_stream(), mr); + return detail::rpartition(input, delimiter, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index 3be5937297f..913aec79758 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -340,37 +340,41 @@ std::unique_ptr rsplit_record_re(strings_column_view const& input, std::unique_ptr
split_re(strings_column_view const& input, regex_program const& prog, size_type maxsplit, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::split_re(input, prog, maxsplit, cudf::get_default_stream(), mr); + return detail::split_re(input, prog, maxsplit, stream, mr); } std::unique_ptr split_record_re(strings_column_view const& input, regex_program const& prog, size_type maxsplit, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::split_record_re(input, prog, maxsplit, cudf::get_default_stream(), mr); + return detail::split_record_re(input, prog, maxsplit, stream, mr); } std::unique_ptr
rsplit_re(strings_column_view const& input, regex_program const& prog, size_type maxsplit, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rsplit_re(input, prog, maxsplit, cudf::get_default_stream(), mr); + return detail::rsplit_re(input, prog, maxsplit, stream, mr); } std::unique_ptr rsplit_record_re(strings_column_view const& input, regex_program const& prog, size_type maxsplit, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::rsplit_record_re(input, prog, maxsplit, cudf::get_default_stream(), mr); + return detail::rsplit_record_re(input, prog, maxsplit, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b15a6c41d39..4de18fceac1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -634,7 +634,7 @@ ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp - streams/strings/strings_tests.cpp STREAM_MODE testing + streams/strings/split_test.cpp streams/strings/strings_tests.cpp STREAM_MODE testing ) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/strings/split_test.cpp b/cpp/tests/streams/strings/split_test.cpp new file mode 100644 index 00000000000..24247f6f79c --- /dev/null +++ b/cpp/tests/streams/strings/split_test.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +class StringsSplitTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsSplitTest, SplitPartition) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo thesé", "tést strings", ""}); + auto view = cudf::strings_column_view(input); + + auto const delimiter = cudf::string_scalar("é", true, cudf::test::get_default_stream()); + cudf::strings::split(view, delimiter, -1, cudf::test::get_default_stream()); + cudf::strings::rsplit(view, delimiter, -1, cudf::test::get_default_stream()); + cudf::strings::split_record(view, delimiter, -1, cudf::test::get_default_stream()); + cudf::strings::rsplit_record(view, delimiter, -1, cudf::test::get_default_stream()); + cudf::strings::partition(view, delimiter, cudf::test::get_default_stream()); + cudf::strings::rpartition(view, delimiter, cudf::test::get_default_stream()); + + auto const pattern = std::string("\\s"); + auto const prog = cudf::strings::regex_program::create(pattern); + cudf::strings::split_re(view, *prog, -1, cudf::test::get_default_stream()); + cudf::strings::split_record_re(view, *prog, -1, cudf::test::get_default_stream()); + cudf::strings::rsplit_re(view, *prog, -1, cudf::test::get_default_stream()); + cudf::strings::rsplit_record_re(view, *prog, -1, cudf::test::get_default_stream()); +} From 737b7593a58679fc59fd68e23eaf92195b9bd34c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 12 Oct 2023 08:13:11 -0700 Subject: [PATCH 57/76] Use branch-23.12 workflows. (#14271) This PR switches back to using `branch-23.12` for CI workflows because the CUDA 12 ARM conda migration is complete. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/14271 --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 28 ++++++++++++++-------------- .github/workflows/test.yaml | 16 ++++++++-------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index dc2c81d1c77..ab028eb89cc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: ${{ inputs.build_type || 'branch' }} @@ -100,7 +100,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 047b80f2e5c..214f9c90b41 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,34 +26,34 @@ jobs: - wheel-build-dask-cudf - wheel-tests-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.12 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_cudf.sh" @@ -61,14 +61,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -78,7 +78,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -88,7 +88,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -98,21 +98,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-tests-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request @@ -120,7 +120,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e58227c30dc..9ca32bcfe03 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -36,7 +36,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: test_script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@cuda-120-arm + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: nightly From fa4e8ab1af4acfd2c88a619b4d9693f4a5fda168 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 12 Oct 2023 17:11:51 -0400 Subject: [PATCH 58/76] Expose stream parameter in public strings replace APIs (#14261) Add stream parameter to public APIs: - `cudf::strings::replace()` (x2) - `cudf::strings::replace_slice()` - `cudf::strings::replace_re()` (x2) - `cudf::strings::replace_with_backrefs()` Also cleaned up some of the doxygen comments and added stream-tests. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Mike Wilson (https://github.com/hyperbolic2346) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14261 --- cpp/include/cudf/strings/replace.hpp | 42 +++++++----- cpp/include/cudf/strings/replace_re.hpp | 28 +++++--- cpp/src/strings/replace/backref_re.cu | 3 +- cpp/src/strings/replace/multi.cu | 3 +- cpp/src/strings/replace/multi_re.cu | 3 +- cpp/src/strings/replace/replace.cu | 8 ++- cpp/src/strings/replace/replace_re.cu | 4 +- cpp/tests/CMakeLists.txt | 10 ++- cpp/tests/streams/strings/replace_test.cpp | 80 ++++++++++++++++++++++ 9 files changed, 142 insertions(+), 39 deletions(-) create mode 100644 cpp/tests/streams/strings/replace_test.cpp diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index 22818f7542e..2476a41e886 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,19 +54,21 @@ namespace strings { * * @throw cudf::logic_error if target is an empty string. * - * @param strings Strings column for this operation. - * @param target String to search for within each string. - * @param repl Replacement string if target is found. + * @param input Strings column for this operation + * @param target String to search for within each string + * @param repl Replacement string if target is found * @param maxrepl Maximum times to replace if target appears multiple times in the input string. * Default of -1 specifies replace all occurrences of target in each string. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& target, string_scalar const& repl, - int32_t maxrepl = -1, + cudf::size_type maxrepl = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -92,21 +94,23 @@ std::unique_ptr replace( * * @throw cudf::logic_error if start is greater than stop. * - * @param strings Strings column for this operation. + * @param input Strings column for this operation. * @param repl Replacement string for specified positions found. * Default is empty string. * @param start Start position where repl will be added. * Default is 0, first character position. * @param stop End position (exclusive) to use for replacement. * Default of -1 specifies the end of each string. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace_slice( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& repl = string_scalar(""), size_type start = 0, size_type stop = -1, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -141,16 +145,18 @@ std::unique_ptr replace_slice( * if repls is a single string. * @throw cudf::logic_error if targets or repls contain null entries. * - * @param strings Strings column for this operation. - * @param targets Strings to search for in each string. - * @param repls Corresponding replacement strings for target strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings column for this operation + * @param targets Strings to search for in each string + * @param repls Corresponding replacement strings for target strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace( - strings_column_view const& strings, + strings_column_view const& input, strings_column_view const& targets, strings_column_view const& repls, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index bc6659835c3..77db2882253 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -43,20 +43,22 @@ struct regex_program; * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation + * @param input Strings instance for this operation * @param prog Regex program instance * @param replacement The string used to replace the matched sequence in each string. * Default is an empty string. * @param max_replace_count The maximum number of times to replace the matched pattern * within each string. Default replaces every substring that is matched. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New strings column */ std::unique_ptr replace_re( - strings_column_view const& strings, + strings_column_view const& input, regex_program const& prog, string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -67,18 +69,20 @@ std::unique_ptr replace_re( * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. * - * @param strings Strings instance for this operation. - * @param patterns The regular expression patterns to search within each string. - * @param replacements The strings used for replacement. - * @param flags Regex flags for interpreting special characters in the patterns. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings instance for this operation + * @param patterns The regular expression patterns to search within each string + * @param replacements The strings used for replacement + * @param flags Regex flags for interpreting special characters in the patterns + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr replace_re( - strings_column_view const& strings, + strings_column_view const& input, std::vector const& patterns, strings_column_view const& replacements, regex_flags const flags = regex_flags::DEFAULT, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -92,16 +96,18 @@ std::unique_ptr replace_re( * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also * if the index exceeds the group count specified in the pattern * - * @param strings Strings instance for this operation + * @param input Strings instance for this operation * @param prog Regex program instance * @param replacement The replacement template for creating the output string + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New strings column */ std::unique_ptr replace_with_backrefs( - strings_column_view const& strings, + strings_column_view const& input, regex_program const& prog, std::string_view replacement, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace strings diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 31e06aac72b..74f38cbcc20 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -148,10 +148,11 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, std::unique_ptr replace_with_backrefs(strings_column_view const& strings, regex_program const& prog, std::string_view replacement, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_with_backrefs(strings, prog, replacement, cudf::get_default_stream(), mr); + return detail::replace_with_backrefs(strings, prog, replacement, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 92ace4e7bc7..ee47932100a 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -490,10 +490,11 @@ std::unique_ptr replace(strings_column_view const& input, std::unique_ptr replace(strings_column_view const& strings, strings_column_view const& targets, strings_column_view const& repls, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace(strings, targets, repls, cudf::get_default_stream(), mr); + return detail::replace(strings, targets, repls, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 867b443c036..3375cb7a789 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -206,10 +206,11 @@ std::unique_ptr replace_re(strings_column_view const& strings, std::vector const& patterns, strings_column_view const& replacements, regex_flags const flags, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_re(strings, patterns, replacements, flags, cudf::get_default_stream(), mr); + return detail::replace_re(strings, patterns, replacements, flags, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index acc1502f4d6..a6a14f27dec 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -751,21 +751,23 @@ std::unique_ptr replace_nulls(strings_column_view const& strings, std::unique_ptr replace(strings_column_view const& strings, string_scalar const& target, string_scalar const& repl, - int32_t maxrepl, + cudf::size_type maxrepl, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace(strings, target, repl, maxrepl, cudf::get_default_stream(), mr); + return detail::replace(strings, target, repl, maxrepl, stream, mr); } std::unique_ptr replace_slice(strings_column_view const& strings, string_scalar const& repl, size_type start, size_type stop, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_slice(strings, repl, start, stop, cudf::get_default_stream(), mr); + return detail::replace_slice(strings, repl, start, stop, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index 81ddb937be5..502d5f1a52e 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -134,11 +134,11 @@ std::unique_ptr replace_re(strings_column_view const& strings, regex_program const& prog, string_scalar const& replacement, std::optional max_replace_count, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::replace_re( - strings, prog, replacement, max_replace_count, cudf::get_default_stream(), mr); + return detail::replace_re(strings, prog, replacement, max_replace_count, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4de18fceac1..f36fcbc9246 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -633,8 +633,14 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( - STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp - streams/strings/split_test.cpp streams/strings/strings_tests.cpp STREAM_MODE testing + STREAM_STRINGS_TEST + streams/strings/case_test.cpp + streams/strings/find_test.cpp + streams/strings/replace_test.cpp + streams/strings/split_test.cpp + streams/strings/strings_tests.cpp + STREAM_MODE + testing ) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/strings/replace_test.cpp b/cpp/tests/streams/strings/replace_test.cpp new file mode 100644 index 00000000000..fc87460b706 --- /dev/null +++ b/cpp/tests/streams/strings/replace_test.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +class StringsReplaceTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsReplaceTest, Replace) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést strings", ""}); + auto view = cudf::strings_column_view(input); + + auto const target = cudf::string_scalar("é", true, cudf::test::get_default_stream()); + auto const repl = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + cudf::strings::replace(view, target, repl, -1, cudf::test::get_default_stream()); + cudf::strings::replace(view, view, view, cudf::test::get_default_stream()); + cudf::strings::replace_slice(view, repl, 1, 2, cudf::test::get_default_stream()); + + auto const pattern = std::string("[a-z]"); + auto const prog = cudf::strings::regex_program::create(pattern); + cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream()); + + cudf::test::strings_column_wrapper repls({"1", "a", " "}); + cudf::strings::replace_re(view, + {pattern, pattern, pattern}, + cudf::strings_column_view(repls), + cudf::strings::regex_flags::DEFAULT, + cudf::test::get_default_stream()); +} + +TEST_F(StringsReplaceTest, ReplaceRegex) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést strings", ""}); + auto view = cudf::strings_column_view(input); + + auto const repl = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const pattern = std::string("[a-z]"); + auto const prog = cudf::strings::regex_program::create(pattern); + cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream()); + + cudf::test::strings_column_wrapper repls({"1", "a", " "}); + cudf::strings::replace_re(view, + {pattern, pattern, pattern}, + cudf::strings_column_view(repls), + cudf::strings::regex_flags::DEFAULT, + cudf::test::get_default_stream()); +} + +TEST_F(StringsReplaceTest, ReplaceRegexBackref) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo thesé", "tést strings"}); + auto view = cudf::strings_column_view(input); + + auto const repl_template = std::string("\\2-\\1"); + auto const pattern = std::string("(\\w) (\\w)"); + auto const prog = cudf::strings::regex_program::create(pattern); + cudf::strings::replace_with_backrefs( + view, *prog, repl_template, cudf::test::get_default_stream()); +} From 6e00ad06abb1152816ed6edda698cb26f08a64d2 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:32:25 -0400 Subject: [PATCH 59/76] Return error if BOOL8 column-type is used with integers-to-hex (#14208) Removes support to convert BOOL8 column-type to hex using `cudf::strings::integers_to_hex`. Also fixed other integer to string conversions to remove this unsupported type. Added gtests to verify an error is thrown for this case. Closes #14232 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/14208 --- .../cudf/strings/convert/convert_integers.hpp | 4 +- cpp/include/cudf/utilities/traits.hpp | 24 ++++++++++++ cpp/src/strings/convert/convert_hex.cu | 27 ++++++------- cpp/src/strings/convert/convert_integers.cu | 38 ++++++------------- cpp/src/utilities/traits.cpp | 15 +++++++- cpp/tests/strings/integers_tests.cpp | 26 +++++++++++++ 6 files changed, 89 insertions(+), 45 deletions(-) diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 44213b84139..756ce48645d 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -199,14 +199,14 @@ std::unique_ptr is_hex( * * @code{.pseudo} * Example: - * input = [123, -1, 0, 27, 342718233] // int32 type input column + * input = [1234, -1, 0, 27, 342718233] // int32 type input column * s = integers_to_hex(input) * s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719'] * @endcode * * The example above shows an `INT32` type column where each integer is 4 bytes. * Leading zeros are suppressed unless filling out a complete byte as in - * `123 -> '04D2'` instead of `000004D2` or `4D2`. + * `1234 -> '04D2'` instead of `000004D2` or `4D2`. * * @throw cudf::logic_error if the input column is not integral type. * diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 51f5d9d571a..2dda0740b96 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -279,6 +279,30 @@ constexpr inline bool is_integral() */ bool is_integral(data_type type); +/** + * @brief Indicates whether the type `T` is an integral type but not bool type. + * + * @tparam T The type to verify + * @return true `T` is integral but not bool + * @return false `T` is not integral or is bool + */ +template +constexpr inline bool is_integral_not_bool() +{ + return cuda::std::is_integral_v and not std::is_same_v; +} + +/** + * @brief Indicates whether `type` is a integral `data_type` and not BOOL8 + * + * "Integral" types are fundamental integer types such as `INT*` and `UINT*`. + * + * @param type The `data_type` to verify + * @return true `type` is integral but not bool + * @return false `type` is integral or is bool + */ +bool is_integral_not_bool(data_type type); + /** * @brief Indicates whether the type `T` is a floating point type. * diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index bed682aba71..f5bdbcbd199 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -93,7 +93,8 @@ struct hex_to_integer_fn { * The output_column is expected to be one of the integer types only. */ struct dispatch_hex_to_integers_fn { - template >* = nullptr> + template ()>* = nullptr> void operator()(column_device_view const& strings_column, mutable_column_view& output_column, rmm::cuda_stream_view stream) const @@ -105,22 +106,14 @@ struct dispatch_hex_to_integers_fn { d_results, hex_to_integer_fn{strings_column}); } - // non-integral types throw an exception + // non-integer types throw an exception template - std::enable_if_t, void> operator()(Args&&...) const + std::enable_if_t(), void> operator()(Args&&...) const { - CUDF_FAIL("Output for hex_to_integers must be an integral type."); + CUDF_FAIL("Output for hex_to_integers must be an integer type."); } }; -template <> -void dispatch_hex_to_integers_fn::operator()(column_device_view const&, - mutable_column_view&, - rmm::cuda_stream_view) const -{ - CUDF_FAIL("Output for hex_to_integers must not be a boolean type."); -} - /** * @brief Functor to convert integers to hexadecimal strings * @@ -179,7 +172,8 @@ struct integer_to_hex_fn { }; struct dispatch_integers_to_hex_fn { - template >* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -195,11 +189,12 @@ struct dispatch_integers_to_hex_fn { input.null_count(), cudf::detail::copy_bitmask(input, stream, mr)); } - // non-integral types throw an exception + // non-integer types throw an exception template - std::enable_if_t, std::unique_ptr> operator()(Args...) const + std::enable_if_t(), std::unique_ptr> operator()( + Args...) const { - CUDF_FAIL("integers_to_hex only supports integral type columns"); + CUDF_FAIL("integers_to_hex only supports integer type columns"); } }; diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 5597d2831c0..2c21fc5d790 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -111,7 +111,7 @@ inline __device__ bool is_integer(string_view const& d_str) * @brief The dispatch functions for checking if strings are valid integers. */ struct dispatch_is_integer_fn { - template >* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(strings_column_view const& strings, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -145,7 +145,7 @@ struct dispatch_is_integer_fn { return results; } - template >* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(strings_column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const @@ -243,7 +243,8 @@ struct string_to_integer_fn { * The output_column is expected to be one of the integer types only. */ struct dispatch_to_integers_fn { - template >* = nullptr> + template ()>* = nullptr> void operator()(column_device_view const& strings_column, mutable_column_view& output_column, rmm::cuda_stream_view stream) const @@ -254,22 +255,14 @@ struct dispatch_to_integers_fn { output_column.data(), string_to_integer_fn{strings_column}); } - // non-integral types throw an exception - template >* = nullptr> + // non-integer types throw an exception + template ()>* = nullptr> void operator()(column_device_view const&, mutable_column_view&, rmm::cuda_stream_view) const { - CUDF_FAIL("Output for to_integers must be an integral type."); + CUDF_FAIL("Output for to_integers must be an integer type."); } }; -template <> -void dispatch_to_integers_fn::operator()(column_device_view const&, - mutable_column_view&, - rmm::cuda_stream_view) const -{ - CUDF_FAIL("Output for to_integers must not be a boolean type."); -} - } // namespace // This will convert a strings column into any integer column type. @@ -351,7 +344,8 @@ struct from_integers_fn { * The template function declaration ensures only integer types are used. */ struct dispatch_from_integers_fn { - template >* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& integers, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -373,23 +367,15 @@ struct dispatch_from_integers_fn { std::move(null_mask)); } - // non-integral types throw an exception - template >* = nullptr> + // non-integer types throw an exception + template ()>* = nullptr> std::unique_ptr operator()(column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const { - CUDF_FAIL("Values for from_integers function must be an integral type."); + CUDF_FAIL("Values for from_integers function must be an integer type."); } }; - -template <> -std::unique_ptr dispatch_from_integers_fn::operator()( - column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const -{ - CUDF_FAIL("Input for from_integers must not be a boolean type."); -} - } // namespace // This will convert all integer column types into a strings column. diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp index bc10dd7845a..b0078ff85a2 100644 --- a/cpp/src/utilities/traits.cpp +++ b/cpp/src/utilities/traits.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,6 +158,19 @@ struct is_integral_impl { bool is_integral(data_type type) { return cudf::type_dispatcher(type, is_integral_impl{}); } +struct is_integral_not_bool_impl { + template + constexpr bool operator()() + { + return is_integral_not_bool(); + } +}; + +bool is_integral_not_bool(data_type type) +{ + return cudf::type_dispatcher(type, is_integral_not_bool_impl{}); +} + struct is_floating_point_impl { template constexpr bool operator()() diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 59805f9cb6d..c8f292f55b2 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -456,3 +456,29 @@ TEST_F(StringsConvertTest, IntegerToHexWithNull) auto results = cudf::strings::integers_to_hex(integers); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } + +TEST_F(StringsConvertTest, IntegerConvertErrors) +{ + cudf::test::fixed_width_column_wrapper bools( + {true, true, false, false, true, true, false, true}); + cudf::test::fixed_width_column_wrapper floats( + {123456.0, -1.0, 0.0, 0.0, 12.0, 12345.0, 123456789.0}); + EXPECT_THROW(cudf::strings::integers_to_hex(bools), cudf::logic_error); + EXPECT_THROW(cudf::strings::integers_to_hex(floats), cudf::logic_error); + EXPECT_THROW(cudf::strings::from_integers(bools), cudf::logic_error); + EXPECT_THROW(cudf::strings::from_integers(floats), cudf::logic_error); + + auto input = cudf::test::strings_column_wrapper({"123456", "-1", "0"}); + auto view = cudf::strings_column_view(input); + EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::BOOL8)), + cudf::logic_error); + EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::FLOAT32)), + cudf::logic_error); + EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::TIMESTAMP_SECONDS)), + cudf::logic_error); + EXPECT_THROW( + cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DURATION_MILLISECONDS)), + cudf::logic_error); + EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DECIMAL32)), + cudf::logic_error); +} From d590e0bde9389b8a403b2b7ae4c5372ae6728016 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:37:00 -0400 Subject: [PATCH 60/76] Expose stream parameter in public strings convert APIs (#14255) Add stream parameter to public APIs: - `cudf::strings::to_booleans()` - `cudf::strings::from_booleans()` - `cudf::strings::to_timestamps()` - `cudf::strings::from_timestamps()` - `cudf::strings::is_timestamp()` - `cudf::strings::to_durations()` - `cudf::strings::from_durations()` - `cudf::strings::to_fixed_point()` - `cudf::strings::from_fixed_point()` - `cudf::strings::to_floats()` - `cudf::strings::from_floats()` - `cudf::strings::is_float()` - `cudf::strings::to_integers()` - `cudf::strings::from_integers()` - `cudf::strings::is_integer()` - `cudf::strings::hex_to_integers()` - `cudf::strings::integers_to_hex()` - `cudf::strings::is_hex()` - `cudf::strings::ipv4_to_integers()` - `cudf::strings::integers_to_ipv4()` - `cudf::strings::is_ipv4()` - `cudf::strings::url_encode()` - `cudf::strings::url_decode()` - `cudf::strings::format_list_column()` Also cleaned up some of the doxygen comments and removed some default parameters. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - MithunR (https://github.com/mythrocks) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14255 --- .../cudf/strings/convert/convert_booleans.hpp | 32 ++-- .../cudf/strings/convert/convert_datetime.hpp | 34 ++-- .../strings/convert/convert_durations.hpp | 26 ++-- .../strings/convert/convert_fixed_point.hpp | 30 ++-- .../cudf/strings/convert/convert_floats.hpp | 30 ++-- .../cudf/strings/convert/convert_integers.hpp | 72 +++++---- .../cudf/strings/convert/convert_ipv4.hpp | 30 ++-- .../cudf/strings/convert/convert_lists.hpp | 14 +- .../cudf/strings/convert/convert_urls.hpp | 22 +-- cpp/src/strings/convert/convert_booleans.cu | 20 +-- cpp/src/strings/convert/convert_datetime.cu | 9 +- cpp/src/strings/convert/convert_durations.cu | 20 +-- .../strings/convert/convert_fixed_point.cu | 11 +- cpp/src/strings/convert/convert_floats.cu | 42 ++--- cpp/src/strings/convert/convert_hex.cu | 9 +- cpp/src/strings/convert/convert_integers.cu | 64 ++++---- cpp/src/strings/convert/convert_ipv4.cu | 39 ++--- cpp/src/strings/convert/convert_lists.cu | 3 +- cpp/src/strings/convert/convert_urls.cu | 10 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/strings/convert_test.cpp | 146 ++++++++++++++++++ cpp/tests/strings/booleans_tests.cpp | 33 +++- cpp/tests/strings/format_lists_tests.cpp | 9 +- java/src/main/native/src/ColumnViewJni.cpp | 11 +- 24 files changed, 487 insertions(+), 230 deletions(-) create mode 100644 cpp/tests/streams/strings/convert_test.cpp diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp index ab63503f166..9e9f25e800a 100644 --- a/cpp/include/cudf/strings/convert/convert_booleans.hpp +++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,16 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param true_string String to expect for true. Non-matching strings are false. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New BOOL8 column converted from strings. + * @param input Strings instance for this operation + * @param true_string String to expect for true. Non-matching strings are false + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New BOOL8 column converted from strings */ std::unique_ptr to_booleans( - strings_column_view const& strings, - string_scalar const& true_string = string_scalar("true"), + strings_column_view const& input, + string_scalar const& true_string, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -53,16 +55,18 @@ std::unique_ptr to_booleans( * * @throw cudf::logic_error if the input column is not BOOL8 type. * - * @param booleans Boolean column to convert. - * @param true_string String to use for true in the output column. - * @param false_string String to use for false in the output column. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param booleans Boolean column to convert + * @param true_string String to use for true in the output column + * @param false_string String to use for false in the output column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr from_booleans( column_view const& booleans, - string_scalar const& true_string = string_scalar("true"), - string_scalar const& false_string = string_scalar("false"), + string_scalar const& true_string, + string_scalar const& false_string, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp index fa729d26734..81cce14b53b 100644 --- a/cpp/include/cudf/strings/convert/convert_datetime.hpp +++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp @@ -77,16 +77,18 @@ namespace strings { * * @throw cudf::logic_error if timestamp_type is not a timestamp type. * - * @param strings Strings instance for this operation. - * @param timestamp_type The timestamp type used for creating the output column. - * @param format String specifying the timestamp format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New datetime column. + * @param input Strings instance for this operation + * @param timestamp_type The timestamp type used for creating the output column + * @param format String specifying the timestamp format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New datetime column */ std::unique_ptr to_timestamps( - strings_column_view const& strings, + strings_column_view const& input, data_type timestamp_type, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,14 +126,16 @@ std::unique_ptr to_timestamps( * This will return a column of type BOOL8 where a `true` row indicates the corresponding * input string can be parsed correctly with the given format. * - * @param strings Strings instance for this operation. - * @param format String specifying the timestamp format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New BOOL8 column. + * @param input Strings instance for this operation + * @param format String specifying the timestamp format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New BOOL8 column */ std::unique_ptr is_timestamp( - strings_column_view const& strings, + strings_column_view const& input, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -231,19 +235,21 @@ std::unique_ptr is_timestamp( * @throw cudf::logic_error if the `format` string is empty * @throw cudf::logic_error if `names.size()` is an invalid size. Must be 0 or 40 strings. * - * @param timestamps Timestamp values to convert. + * @param timestamps Timestamp values to convert * @param format The string specifying output format. * Default format is "%Y-%m-%dT%H:%M:%SZ". * @param names The string names to use for weekdays ("%a", "%A") and months ("%b", "%B") * Default is an empty `strings_column_view`. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with formatted timestamps. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with formatted timestamps */ std::unique_ptr from_timestamps( column_view const& timestamps, std::string_view format = "%Y-%m-%dT%H:%M:%SZ", strings_column_view const& names = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_durations.hpp b/cpp/include/cudf/strings/convert/convert_durations.hpp index e915ec26279..a1f4e4ead1d 100644 --- a/cpp/include/cudf/strings/convert/convert_durations.hpp +++ b/cpp/include/cudf/strings/convert/convert_durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,16 +65,18 @@ namespace strings { * * @throw cudf::logic_error if duration_type is not a duration type. * - * @param strings Strings instance for this operation. - * @param duration_type The duration type used for creating the output column. - * @param format String specifying the duration format in strings. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New duration column. + * @param input Strings instance for this operation + * @param duration_type The duration type used for creating the output column + * @param format String specifying the duration format in strings + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New duration column */ std::unique_ptr to_durations( - strings_column_view const& strings, + strings_column_view const& input, data_type duration_type, std::string_view format, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -115,15 +117,17 @@ std::unique_ptr to_durations( * * @throw cudf::logic_error if `durations` column parameter is not a duration type. * - * @param durations Duration values to convert. + * @param durations Duration values to convert * @param format The string specifying output format. - * Default format is ""%d days %H:%M:%S". - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with formatted durations. + * Default format is ""%D days %H:%M:%S". + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches + * @return New strings column with formatted durations */ std::unique_ptr from_durations( column_view const& durations, std::string_view format = "%D days %H:%M:%S", + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp index 3852dc8e81a..8f37715967a 100644 --- a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp +++ b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,14 +53,16 @@ namespace strings { * * @throw cudf::logic_error if `output_type` is not a fixed-point decimal type. * - * @param input Strings instance for this operation. - * @param output_type Type of fixed-point column to return including the scale value. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of `output_type`. + * @param input Strings instance for this operation + * @param output_type Type of fixed-point column to return including the scale value + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of `output_type` */ std::unique_ptr to_fixed_point( strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -83,12 +85,14 @@ std::unique_ptr to_fixed_point( * * @throw cudf::logic_error if the `input` column is not a fixed-point decimal type. * - * @param input Fixed-point column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Fixed-point column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr from_fixed_point( column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -111,14 +115,16 @@ std::unique_ptr from_fixed_point( * * @throw cudf::logic_error if the `decimal_type` is not a fixed-point decimal type. * - * @param input Strings instance for this operation. - * @param decimal_type Fixed-point type (with scale) used only for checking overflow. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param decimal_type Fixed-point type (with scale) used only for checking overflow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_fixed_point( strings_column_view const& input, data_type decimal_type = data_type{type_id::DECIMAL64}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index 38a84fc1548..a35cb68ef4e 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,14 +39,16 @@ namespace strings { * * @throw cudf::logic_error if output_type is not float type. * - * @param strings Strings instance for this operation. - * @param output_type Type of float numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with floats converted from strings. + * @param strings Strings instance for this operation + * @param output_type Type of float numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with floats converted from strings */ std::unique_ptr to_floats( strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -62,12 +64,14 @@ std::unique_ptr to_floats( * * @throw cudf::logic_error if floats column is not float type. * - * @param floats Numeric column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with floats as strings. + * @param floats Numeric column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with floats as strings */ std::unique_ptr from_floats( column_view const& floats, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -86,12 +90,14 @@ std::unique_ptr from_floats( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_float( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 756ce48645d..74ec5d315a2 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -46,14 +46,16 @@ namespace strings { * * @throw cudf::logic_error if output_type is not integral type. * - * @param strings Strings instance for this operation. - * @param output_type Type of integer numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with integers converted from strings. + * @param input Strings instance for this operation + * @param output_type Type of integer numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with integers converted from strings */ std::unique_ptr to_integers( - strings_column_view const& strings, + strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -67,12 +69,14 @@ std::unique_ptr to_integers( * * @throw cudf::logic_error if integers column is not integral type. * - * @param integers Numeric column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with integers as strings. + * @param integers Numeric column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with integers as strings */ std::unique_ptr from_integers( column_view const& integers, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -94,12 +98,14 @@ std::unique_ptr from_integers( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_integer( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -124,14 +130,16 @@ std::unique_ptr is_integer( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param int_type Integer type used for checking underflow and overflow. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param int_type Integer type used for checking underflow and overflow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_integer( - strings_column_view const& strings, + strings_column_view const& input, data_type int_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -152,14 +160,16 @@ std::unique_ptr is_integer( * * @throw cudf::logic_error if output_type is not integral type. * - * @param strings Strings instance for this operation. - * @param output_type Type of integer numeric column to return. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with integers converted from strings. + * @param input Strings instance for this operation + * @param output_type Type of integer numeric column to return + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with integers converted from strings */ std::unique_ptr hex_to_integers( - strings_column_view const& strings, + strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -179,12 +189,14 @@ std::unique_ptr hex_to_integers( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_hex( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -210,12 +222,14 @@ std::unique_ptr is_hex( * * @throw cudf::logic_error if the input column is not integral type. * - * @param input Integer column to convert to hex. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with hexadecimal characters. + * @param input Integer column to convert to hex + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with hexadecimal characters */ std::unique_ptr integers_to_hex( column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp index 22272af74fc..25ad7b86748 100644 --- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp +++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,12 +48,14 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New INT64 column converted from strings. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New INT64 column converted from strings */ std::unique_ptr ipv4_to_integers( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -71,12 +73,14 @@ std::unique_ptr ipv4_to_integers( * * @throw cudf::logic_error if the input column is not INT64 type. * - * @param integers Integer (INT64) column to convert. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param integers Integer (INT64) column to convert + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr integers_to_ipv4( column_view const& integers, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -96,12 +100,14 @@ std::unique_ptr integers_to_ipv4( * * Any null row results in a null entry for that row in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of boolean results for each string */ std::unique_ptr is_ipv4( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index 7ab1bf47b0a..dedf4e95138 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -50,17 +50,19 @@ namespace strings { * * @throw cudf::logic_error if the input column is not a LIST type with a STRING child. * - * @param input Lists column to format. - * @param na_rep Replacement string for null elements. - * @param separators Strings to use for enclosing list components and separating elements. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Lists column to format + * @param na_rep Replacement string for null elements + * @param separators Strings to use for enclosing list components and separating elements + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr format_list_column( lists_column_view const& input, - string_scalar const& na_rep = string_scalar("NULL"), + string_scalar const& na_rep = string_scalar(""), strings_column_view const& separators = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp index 7f29a0d2149..902835081af 100644 --- a/cpp/include/cudf/strings/convert/convert_urls.hpp +++ b/cpp/include/cudf/strings/convert/convert_urls.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,12 +39,14 @@ namespace strings { * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr url_encode( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -60,12 +62,14 @@ std::unique_ptr url_encode( * * Any null entries will result in corresponding null entries in the output column. * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. + * @param input Strings instance for this operation + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column */ std::unique_ptr url_decode( - strings_column_view const& strings, + strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu index 0d04fc74b0c..8196e1d90fb 100644 --- a/cpp/src/strings/convert/convert_booleans.cu +++ b/cpp/src/strings/convert/convert_booleans.cu @@ -39,25 +39,25 @@ namespace cudf { namespace strings { namespace detail { // Convert strings column to boolean column -std::unique_ptr to_booleans(strings_column_view const& strings, +std::unique_ptr to_booleans(strings_column_view const& input, string_scalar const& true_string, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); + size_type strings_count = input.size(); if (strings_count == 0) return make_numeric_column(data_type{type_id::BOOL8}, 0); CUDF_EXPECTS(true_string.is_valid(stream) && true_string.size() > 0, "Parameter true_string must not be empty."); auto d_true = string_view(true_string.data(), true_string.size()); - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_strings = *strings_column; // create output column copying the strings' null-mask auto results = make_numeric_column(data_type{type_id::BOOL8}, strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto results_view = results->mutable_view(); @@ -73,19 +73,20 @@ std::unique_ptr to_booleans(strings_column_view const& strings, result = d_strings.element(idx).compare(d_true) == 0; return result; }); - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } } // namespace detail // external API -std::unique_ptr to_booleans(strings_column_view const& strings, +std::unique_ptr to_booleans(strings_column_view const& input, string_scalar const& true_string, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_booleans(strings, true_string, cudf::get_default_stream(), mr); + return detail::to_booleans(input, true_string, stream, mr); } namespace detail { @@ -156,10 +157,11 @@ std::unique_ptr from_booleans(column_view const& booleans, std::unique_ptr from_booleans(column_view const& booleans, string_scalar const& true_string, string_scalar const& false_string, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_booleans(booleans, true_string, false_string, cudf::get_default_stream(), mr); + return detail::from_booleans(booleans, true_string, false_string, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 8a953d778ed..d2609441d72 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -710,18 +710,20 @@ std::unique_ptr is_timestamp(strings_column_view const& input, std::unique_ptr to_timestamps(strings_column_view const& input, data_type timestamp_type, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_timestamps(input, timestamp_type, format, cudf::get_default_stream(), mr); + return detail::to_timestamps(input, timestamp_type, format, stream, mr); } std::unique_ptr is_timestamp(strings_column_view const& input, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_timestamp(input, format, cudf::get_default_stream(), mr); + return detail::is_timestamp(input, format, stream, mr); } namespace detail { @@ -1168,10 +1170,11 @@ std::unique_ptr from_timestamps(column_view const& timestamps, std::unique_ptr from_timestamps(column_view const& timestamps, std::string_view format, strings_column_view const& names, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_timestamps(timestamps, format, names, cudf::get_default_stream(), mr); + return detail::from_timestamps(timestamps, format, names, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 6ab70825a6b..e781581b378 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -690,30 +690,30 @@ std::unique_ptr from_durations(column_view const& durations, durations.type(), dispatch_from_durations_fn{}, durations, format, stream, mr); } -std::unique_ptr to_durations(strings_column_view const& strings, +std::unique_ptr to_durations(strings_column_view const& input, data_type duration_type, std::string_view format, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); + size_type strings_count = input.size(); if (strings_count == 0) return make_duration_column(duration_type, 0); CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty."); - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_column = *strings_column; auto results = make_duration_column(duration_type, strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto results_view = results->mutable_view(); cudf::type_dispatcher( duration_type, dispatch_to_durations_fn(), d_column, format, results_view, stream); - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } @@ -721,19 +721,21 @@ std::unique_ptr to_durations(strings_column_view const& strings, std::unique_ptr from_durations(column_view const& durations, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_durations(durations, format, cudf::get_default_stream(), mr); + return detail::from_durations(durations, format, stream, mr); } -std::unique_ptr to_durations(strings_column_view const& strings, +std::unique_ptr to_durations(strings_column_view const& input, data_type duration_type, std::string_view format, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_durations(strings, duration_type, format, cudf::get_default_stream(), mr); + return detail::to_durations(input, duration_type, format, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 51aab9faeba..2c59f6dcd29 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -184,12 +184,13 @@ std::unique_ptr to_fixed_point(strings_column_view const& input, } // namespace detail // external API -std::unique_ptr to_fixed_point(strings_column_view const& strings, +std::unique_ptr to_fixed_point(strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_fixed_point(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_fixed_point(input, output_type, stream, mr); } namespace detail { @@ -277,10 +278,11 @@ std::unique_ptr from_fixed_point(column_view const& input, // external API std::unique_ptr from_fixed_point(column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_fixed_point(input, cudf::get_default_stream(), mr); + return detail::from_fixed_point(input, stream, mr); } namespace detail { @@ -341,10 +343,11 @@ std::unique_ptr is_fixed_point(strings_column_view const& input, std::unique_ptr is_fixed_point(strings_column_view const& input, data_type decimal_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_fixed_point(input, decimal_type, cudf::get_default_stream(), mr); + return detail::is_fixed_point(input, decimal_type, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 32167589ab4..81d686d690c 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -91,26 +91,26 @@ struct dispatch_to_floats_fn { } // namespace // This will convert a strings column into any float column type. -std::unique_ptr to_floats(strings_column_view const& strings, +std::unique_ptr to_floats(strings_column_view const& input, data_type output_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); + size_type strings_count = input.size(); if (strings_count == 0) return make_numeric_column(output_type, 0); - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_strings = *strings_column; // create float output column copying the strings null-mask auto results = make_numeric_column(output_type, strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto results_view = results->mutable_view(); // fill output column with floats type_dispatcher(output_type, dispatch_to_floats_fn{}, d_strings, results_view, stream); - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } @@ -118,12 +118,13 @@ std::unique_ptr to_floats(strings_column_view const& strings, // external API -std::unique_ptr to_floats(strings_column_view const& strings, +std::unique_ptr to_floats(strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_floats(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_floats(input, output_type, stream, mr); } namespace detail { @@ -436,48 +437,51 @@ std::unique_ptr from_floats(column_view const& floats, } // namespace detail // external API -std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) +std::unique_ptr from_floats(column_view const& floats, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_floats(floats, cudf::get_default_stream(), mr); + return detail::from_floats(floats, stream, mr); } namespace detail { -std::unique_ptr is_float(strings_column_view const& strings, +std::unique_ptr is_float(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_column = *strings_column; // create output column auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto d_results = results->mutable_view().data(); // check strings for valid float chars thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), + thrust::make_counting_iterator(input.size()), d_results, [d_column] __device__(size_type idx) { if (d_column.is_null(idx)) return false; return is_float(d_column.element(idx)); }); - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } } // namespace detail // external API -std::unique_ptr is_float(strings_column_view const& strings, +std::unique_ptr is_float(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_float(strings, cudf::get_default_stream(), mr); + return detail::is_float(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index f5bdbcbd199..8f656b149a5 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -275,24 +275,27 @@ std::unique_ptr integers_to_hex(column_view const& input, // external API std::unique_ptr hex_to_integers(strings_column_view const& strings, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::hex_to_integers(strings, output_type, cudf::get_default_stream(), mr); + return detail::hex_to_integers(strings, output_type, stream, mr); } std::unique_ptr is_hex(strings_column_view const& strings, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_hex(strings, cudf::get_default_stream(), mr); + return detail::is_hex(strings, stream, mr); } std::unique_ptr integers_to_hex(column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_hex(input, cudf::get_default_stream(), mr); + return detail::integers_to_hex(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 2c21fc5d790..4839e83d5dd 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -112,20 +112,20 @@ inline __device__ bool is_integer(string_view const& d_str) */ struct dispatch_is_integer_fn { template ()>* = nullptr> - std::unique_ptr operator()(strings_column_view const& strings, + std::unique_ptr operator()(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - auto const d_column = column_device_view::create(strings.parent(), stream); + auto const d_column = column_device_view::create(input.parent(), stream); auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto d_results = results->mutable_view().data(); - if (strings.has_nulls()) { + if (input.has_nulls()) { thrust::transform(rmm::exec_policy(stream), d_column->pair_begin(), d_column->pair_end(), @@ -140,7 +140,7 @@ struct dispatch_is_integer_fn { } // Calling mutable_view() on a column invalidates it's null count so we need to set it back - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } @@ -156,20 +156,20 @@ struct dispatch_is_integer_fn { } // namespace -std::unique_ptr is_integer(strings_column_view const& strings, +std::unique_ptr is_integer(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const d_column = column_device_view::create(strings.parent(), stream); + auto const d_column = column_device_view::create(input.parent(), stream); auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto d_results = results->mutable_view().data(); - if (strings.has_nulls()) { + if (input.has_nulls()) { thrust::transform( rmm::exec_policy(stream), d_column->pair_begin(), @@ -185,36 +185,38 @@ std::unique_ptr is_integer(strings_column_view const& strings, } // Calling mutable_view() on a column invalidates it's null count so we need to set it back - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } -std::unique_ptr is_integer(strings_column_view const& strings, +std::unique_ptr is_integer(strings_column_view const& input, data_type int_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (strings.is_empty()) { return cudf::make_empty_column(type_id::BOOL8); } - return type_dispatcher(int_type, dispatch_is_integer_fn{}, strings, stream, mr); + if (input.is_empty()) { return cudf::make_empty_column(type_id::BOOL8); } + return type_dispatcher(int_type, dispatch_is_integer_fn{}, input, stream, mr); } } // namespace detail // external APIs -std::unique_ptr is_integer(strings_column_view const& strings, +std::unique_ptr is_integer(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, cudf::get_default_stream(), mr); + return detail::is_integer(input, stream, mr); } -std::unique_ptr is_integer(strings_column_view const& strings, +std::unique_ptr is_integer(strings_column_view const& input, data_type int_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_integer(strings, int_type, cudf::get_default_stream(), mr); + return detail::is_integer(input, int_type, stream, mr); } namespace detail { @@ -266,28 +268,28 @@ struct dispatch_to_integers_fn { } // namespace // This will convert a strings column into any integer column type. -std::unique_ptr to_integers(strings_column_view const& strings, +std::unique_ptr to_integers(strings_column_view const& input, data_type output_type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); + size_type strings_count = input.size(); if (strings_count == 0) return make_numeric_column(output_type, 0); // Create integer output column copying the strings null-mask auto results = make_numeric_column(output_type, strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); // Fill output column with integers - auto const strings_dev_view = column_device_view::create(strings.parent(), stream); + auto const strings_dev_view = column_device_view::create(input.parent(), stream); auto results_view = results->mutable_view(); type_dispatcher(output_type, dispatch_to_integers_fn{}, *strings_dev_view, results_view, stream); // Calling mutable_view() on a column invalidates it's null count so we need to set it back - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } @@ -295,12 +297,13 @@ std::unique_ptr to_integers(strings_column_view const& strings, } // namespace detail // external API -std::unique_ptr to_integers(strings_column_view const& strings, +std::unique_ptr to_integers(strings_column_view const& input, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::to_integers(strings, output_type, cudf::get_default_stream(), mr); + return detail::to_integers(input, output_type, stream, mr); } namespace detail { @@ -393,10 +396,11 @@ std::unique_ptr from_integers(column_view const& integers, // external API std::unique_ptr from_integers(column_view const& integers, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::from_integers(integers, cudf::get_default_stream(), mr); + return detail::from_integers(integers, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index adb72cb0263..07e4b3e5b17 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -72,19 +72,19 @@ struct ipv4_to_integers_fn { } // namespace // Convert strings column of IPv4 addresses to integers column -std::unique_ptr ipv4_to_integers(strings_column_view const& strings, +std::unique_ptr ipv4_to_integers(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); + size_type strings_count = input.size(); if (strings_count == 0) return make_numeric_column(data_type{type_id::INT64}, 0); - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); // create output column copying the strings' null-mask auto results = make_numeric_column(data_type{type_id::INT64}, strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto d_results = results->mutable_view().data(); @@ -95,18 +95,19 @@ std::unique_ptr ipv4_to_integers(strings_column_view const& strings, d_results, ipv4_to_integers_fn{*strings_column}); // done - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } } // namespace detail // external API -std::unique_ptr ipv4_to_integers(strings_column_view const& strings, +std::unique_ptr ipv4_to_integers(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::ipv4_to_integers(strings, cudf::get_default_stream(), mr); + return detail::ipv4_to_integers(input, stream, mr); } namespace detail { @@ -173,23 +174,23 @@ std::unique_ptr integers_to_ipv4(column_view const& integers, cudf::detail::copy_bitmask(integers, stream, mr)); } -std::unique_ptr is_ipv4(strings_column_view const& strings, +std::unique_ptr is_ipv4(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto strings_column = column_device_view::create(strings.parent(), stream); + auto strings_column = column_device_view::create(input.parent(), stream); auto d_column = *strings_column; // create output column auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), stream, mr); auto d_results = results->mutable_view().data(); thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), + thrust::make_counting_iterator(input.size()), d_results, [d_column] __device__(size_type idx) { if (d_column.is_null(idx)) return false; @@ -214,7 +215,7 @@ std::unique_ptr is_ipv4(strings_column_view const& strings, return ip_vals[0] >= 0 && ip_vals[1] >= 0 && ip_vals[2] >= 0 && ip_vals[3] >= 0; }); - results->set_null_count(strings.null_count()); + results->set_null_count(input.null_count()); return results; } @@ -223,17 +224,19 @@ std::unique_ptr is_ipv4(strings_column_view const& strings, // external API std::unique_ptr integers_to_ipv4(column_view const& integers, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::integers_to_ipv4(integers, cudf::get_default_stream(), mr); + return detail::integers_to_ipv4(integers, stream, mr); } -std::unique_ptr is_ipv4(strings_column_view const& strings, +std::unique_ptr is_ipv4(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::is_ipv4(strings, cudf::get_default_stream(), mr); + return detail::is_ipv4(input, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu index 3aef37914fd..f9f2b91eb12 100644 --- a/cpp/src/strings/convert/convert_lists.cu +++ b/cpp/src/strings/convert/convert_lists.cu @@ -233,10 +233,11 @@ std::unique_ptr format_list_column(lists_column_view const& input, std::unique_ptr format_list_column(lists_column_view const& input, string_scalar const& na_rep, strings_column_view const& separators, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::format_list_column(input, na_rep, separators, cudf::get_default_stream(), mr); + return detail::format_list_column(input, na_rep, separators, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 9efa148cfd2..9e847131be2 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -148,11 +148,12 @@ std::unique_ptr url_encode(strings_column_view const& input, } // namespace detail // external API -std::unique_ptr url_encode(strings_column_view const& strings, +std::unique_ptr url_encode(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_encode(strings, cudf::get_default_stream(), mr); + return detail::url_encode(input, stream, mr); } namespace detail { @@ -428,11 +429,12 @@ std::unique_ptr url_decode(strings_column_view const& strings, // external API -std::unique_ptr url_decode(strings_column_view const& strings, +std::unique_ptr url_decode(strings_column_view const& input, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::url_decode(strings, cudf::get_default_stream(), mr); + return detail::url_decode(input, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f36fcbc9246..3e30db7abcb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -635,6 +635,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp + streams/strings/convert_test.cpp streams/strings/find_test.cpp streams/strings/replace_test.cpp streams/strings/split_test.cpp diff --git a/cpp/tests/streams/strings/convert_test.cpp b/cpp/tests/streams/strings/convert_test.cpp new file mode 100644 index 00000000000..8dc3f625746 --- /dev/null +++ b/cpp/tests/streams/strings/convert_test.cpp @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +class StringsConvertTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsConvertTest, Booleans) +{ + auto input = cudf::test::strings_column_wrapper({"true", "false", "True", ""}); + auto view = cudf::strings_column_view(input); + + auto true_scalar = cudf::string_scalar("true", true, cudf::test::get_default_stream()); + auto false_scalar = cudf::string_scalar("false", true, cudf::test::get_default_stream()); + + auto bools = cudf::strings::to_booleans(view, true_scalar, cudf::test::get_default_stream()); + cudf::strings::from_booleans( + bools->view(), true_scalar, false_scalar, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Timestamps) +{ + auto input = cudf::test::strings_column_wrapper({"2019-03-20T12:34:56Z", "2020-02-29T00:00:00Z"}); + auto view = cudf::strings_column_view(input); + + std::string format = "%Y-%m-%dT%H:%M:%SZ"; + auto dtype = cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}; + + cudf::strings::is_timestamp(view, format, cudf::test::get_default_stream()); + auto timestamps = + cudf::strings::to_timestamps(view, dtype, format, cudf::test::get_default_stream()); + + auto empty = cudf::test::strings_column_wrapper(); + cudf::strings::from_timestamps( + timestamps->view(), format, cudf::strings_column_view(empty), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Durations) +{ + auto input = cudf::test::strings_column_wrapper({"17975 days 12:34:56", "18321 days 00:00:00"}); + auto view = cudf::strings_column_view(input); + + std::string format = "%D days %H:%M:%S"; + auto dtype = cudf::data_type{cudf::type_id::DURATION_SECONDS}; + + auto durations = + cudf::strings::to_durations(view, dtype, format, cudf::test::get_default_stream()); + cudf::strings::from_durations(durations->view(), format, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, FixedPoint) +{ + auto input = cudf::test::strings_column_wrapper({"1.234E3", "-876", "543.2"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-3}}; + + auto values = cudf::strings::to_fixed_point(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_fixed_point(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Floats) +{ + auto input = cudf::test::strings_column_wrapper({"1.234E3", "-876", "543.2"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::FLOAT32}; + + auto values = cudf::strings::to_floats(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_floats(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_float(view, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, Integers) +{ + auto input = cudf::test::strings_column_wrapper({"1234", "-876", "5432"}); + auto view = cudf::strings_column_view(input); + + auto dtype = cudf::data_type{cudf::type_id::INT32}; + + auto values = cudf::strings::to_integers(view, dtype, cudf::test::get_default_stream()); + cudf::strings::from_integers(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_integer(view, cudf::test::get_default_stream()); + cudf::strings::is_hex(view, cudf::test::get_default_stream()); + cudf::strings::hex_to_integers(view, dtype, cudf::test::get_default_stream()); + cudf::strings::integers_to_hex(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, IPv4) +{ + auto input = cudf::test::strings_column_wrapper({"192.168.0.1", "10.0.0.1"}); + auto view = cudf::strings_column_view(input); + + auto values = cudf::strings::ipv4_to_integers(view, cudf::test::get_default_stream()); + cudf::strings::integers_to_ipv4(values->view(), cudf::test::get_default_stream()); + cudf::strings::is_ipv4(view, cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, URLs) +{ + auto input = cudf::test::strings_column_wrapper({"www.nvidia.com/rapids?p=é", "/_file-7.txt"}); + auto view = cudf::strings_column_view(input); + + auto values = cudf::strings::url_encode(view, cudf::test::get_default_stream()); + cudf::strings::url_decode(values->view(), cudf::test::get_default_stream()); +} + +TEST_F(StringsConvertTest, ListsFormat) +{ + using STR_LISTS = cudf::test::lists_column_wrapper; + auto const input = + STR_LISTS{{STR_LISTS{"a", "bb", "ccc"}, STR_LISTS{}, STR_LISTS{"ddd", "ee", "f"}}, + {STR_LISTS{"gg", "hhh"}, STR_LISTS{"i", "", "", "jj"}}}; + auto view = cudf::lists_column_view(input); + auto null_scalar = cudf::string_scalar("NULL", true, cudf::test::get_default_stream()); + auto separators = cudf::strings_column_view(cudf::test::strings_column_wrapper()); + cudf::strings::format_list_column( + view, null_scalar, separators, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/strings/booleans_tests.cpp b/cpp/tests/strings/booleans_tests.cpp index 0c7fc992065..469ca77a4c5 100644 --- a/cpp/tests/strings/booleans_tests.cpp +++ b/cpp/tests/strings/booleans_tests.cpp @@ -36,7 +36,8 @@ TEST_F(StringsConvertTest, ToBooleans) thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::to_booleans(strings_view); + auto true_scalar = cudf::string_scalar("true"); + auto results = cudf::strings::to_booleans(strings_view, true_scalar); std::vector h_expected{false, false, false, true, false, false}; cudf::test::fixed_width_column_wrapper expected( @@ -60,26 +61,46 @@ TEST_F(StringsConvertTest, FromBooleans) h_column.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - auto results = cudf::strings::from_booleans(column); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + auto results = cudf::strings::from_booleans(column, true_scalar, false_scalar); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings); } TEST_F(StringsConvertTest, ZeroSizeStringsColumnBoolean) { auto const zero_size_column = cudf::make_empty_column(cudf::type_id::BOOL8)->view(); - auto results = cudf::strings::from_booleans(zero_size_column); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + auto results = cudf::strings::from_booleans(zero_size_column, true_scalar, false_scalar); cudf::test::expect_column_empty(results->view()); } TEST_F(StringsConvertTest, ZeroSizeBooleansColumn) { auto const zero_size_strings_column = cudf::make_empty_column(cudf::type_id::STRING)->view(); - auto results = cudf::strings::to_booleans(zero_size_strings_column); + auto true_scalar = cudf::string_scalar("true"); + auto results = cudf::strings::to_booleans(zero_size_strings_column, true_scalar); EXPECT_EQ(0, results->size()); } TEST_F(StringsConvertTest, BooleanError) { - auto column = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 100); - EXPECT_THROW(cudf::strings::from_booleans(column->view()), cudf::logic_error); + auto int_column = cudf::test::fixed_width_column_wrapper({1, 2, 3}); + auto true_scalar = cudf::string_scalar("true"); + auto false_scalar = cudf::string_scalar("false"); + EXPECT_THROW(cudf::strings::from_booleans(int_column, true_scalar, false_scalar), + cudf::logic_error); + + auto bool_column = cudf::test::fixed_width_column_wrapper({1, 0, 1}); + auto null_scalar = cudf::string_scalar("", false); + EXPECT_THROW(cudf::strings::from_booleans(bool_column, null_scalar, false_scalar), + cudf::logic_error); + EXPECT_THROW(cudf::strings::from_booleans(bool_column, true_scalar, null_scalar), + cudf::logic_error); + auto empty_scalar = cudf::string_scalar("", true); + EXPECT_THROW(cudf::strings::from_booleans(int_column, empty_scalar, false_scalar), + cudf::logic_error); + EXPECT_THROW(cudf::strings::from_booleans(int_column, true_scalar, empty_scalar), + cudf::logic_error); } diff --git a/cpp/tests/strings/format_lists_tests.cpp b/cpp/tests/strings/format_lists_tests.cpp index 95dc9725afc..6196b8ed6ad 100644 --- a/cpp/tests/strings/format_lists_tests.cpp +++ b/cpp/tests/strings/format_lists_tests.cpp @@ -60,8 +60,9 @@ TEST_F(StringsFormatListsTest, WithNulls) cudf::test::iterators::null_at(1)}; auto const view = cudf::lists_column_view(input); - auto results = cudf::strings::format_list_column(view); - auto expected = cudf::test::strings_column_wrapper( + auto null_scalar = cudf::string_scalar("NULL"); + auto results = cudf::strings::format_list_column(view, null_scalar); + auto expected = cudf::test::strings_column_wrapper( {"[a,NULL,ccc]", "NULL", "[NULL,bb,ddd]", "[zzz,xxxxx]", "[v,,NULL,w]"}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } @@ -132,11 +133,13 @@ TEST_F(StringsFormatListsTest, SlicedLists) "[ééé,12345abcdef]", "[www,12345]"}); + auto null_scalar = cudf::string_scalar("NULL"); + // set of slice intervals: covers slicing the front, back, and middle std::vector> index_pairs({{0, 11}, {0, 4}, {3, 8}, {5, 11}}); for (auto indexes : index_pairs) { auto sliced = cudf::lists_column_view(cudf::slice(input, {indexes.first, indexes.second})[0]); - auto results = cudf::strings::format_list_column(sliced); + auto results = cudf::strings::format_list_column(sliced, null_scalar); auto expected = cudf::test::strings_column_wrapper(h_expected.begin() + indexes.first, h_expected.begin() + indexes.second); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 0ddaa2c15b5..462f0d8eac9 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1130,7 +1130,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_castTo(JNIEnv *env, jclas } if (n_data_type.id() == cudf::type_id::STRING) { switch (column->type().id()) { - case cudf::type_id::BOOL8: return release_as_jlong(cudf::strings::from_booleans(*column)); + case cudf::type_id::BOOL8: { + auto const true_scalar = cudf::string_scalar("true"); + auto const false_scalar = cudf::string_scalar("false"); + return release_as_jlong(cudf::strings::from_booleans(*column, true_scalar, false_scalar)); + } case cudf::type_id::FLOAT32: case cudf::type_id::FLOAT64: return release_as_jlong(cudf::strings::from_floats(*column)); case cudf::type_id::INT8: @@ -1149,7 +1153,10 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_castTo(JNIEnv *env, jclas } } else if (column->type().id() == cudf::type_id::STRING) { switch (n_data_type.id()) { - case cudf::type_id::BOOL8: return release_as_jlong(cudf::strings::to_booleans(*column)); + case cudf::type_id::BOOL8: { + auto const true_scalar = cudf::string_scalar("true"); + return release_as_jlong(cudf::strings::to_booleans(*column, true_scalar)); + } case cudf::type_id::FLOAT32: case cudf::type_id::FLOAT64: return release_as_jlong(cudf::strings::to_floats(*column, n_data_type)); From 655f3a4659653e95b9f12ed924c7e887be41c5d4 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 16 Oct 2023 12:46:48 -0400 Subject: [PATCH 61/76] Update rapids-cmake functions to non-deprecated signatures (#14265) Update to use non deprecated signatures for `rapids_export` functions Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14265 --- cpp/cmake/thirdparty/get_arrow.cmake | 18 +++++++++++++----- cpp/cmake/thirdparty/get_cufile.cmake | 4 ++-- cpp/cmake/thirdparty/get_gtest.cmake | 4 ++-- cpp/cmake/thirdparty/get_kvikio.cmake | 12 +++++++----- cpp/cmake/thirdparty/get_libcudacxx.cmake | 18 ++++++++---------- cpp/cmake/thirdparty/get_spdlog.cmake | 4 +++- cpp/cmake/thirdparty/get_thrust.cmake | 15 +++++++-------- 7 files changed, 42 insertions(+), 33 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 894dc9649e2..10d3145a36f 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -387,11 +387,19 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) - if(ENABLE_PARQUET) - rapids_export_find_package_root(BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) - rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) - endif() + rapids_export_find_package_root( + BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports + ) + rapids_export_find_package_root( + BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) + rapids_export_find_package_root( + BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) set(ARROW_LIBRARIES "${ARROW_LIBRARIES}" diff --git a/cpp/cmake/thirdparty/get_cufile.cmake b/cpp/cmake/thirdparty/get_cufile.cmake index c0235eba508..bfdff3a99ff 100644 --- a/cpp/cmake/thirdparty/get_cufile.cmake +++ b/cpp/cmake/thirdparty/get_cufile.cmake @@ -21,10 +21,10 @@ function(find_and_configure_cufile) if(cuFile_FOUND AND NOT BUILD_SHARED_LIBS) include("${rapids-cmake-dir}/export/find_package_file.cmake") rapids_export_find_package_file( - BUILD "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + BUILD "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET cudf-exports ) rapids_export_find_package_file( - INSTALL "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + INSTALL "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET cudf-exports ) endif() endfunction() diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake index 1363f43fae2..cfb219448f1 100644 --- a/cpp/cmake/thirdparty/get_gtest.cmake +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -30,7 +30,7 @@ function(find_and_configure_gtest) include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root( - BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-testing-exports + BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-testing-exports ) endif() diff --git a/cpp/cmake/thirdparty/get_kvikio.cmake b/cpp/cmake/thirdparty/get_kvikio.cmake index e94e024d6c9..20712beec41 100644 --- a/cpp/cmake/thirdparty/get_kvikio.cmake +++ b/cpp/cmake/thirdparty/get_kvikio.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -25,10 +25,12 @@ function(find_and_configure_kvikio VERSION) OPTIONS "KvikIO_BUILD_EXAMPLES OFF" ) - if(KvikIO_BINARY_DIR) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD KvikIO "${KvikIO_BINARY_DIR}" cudf-exports) - endif() + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + BUILD KvikIO "${KvikIO_BINARY_DIR}" + EXPORT_SET cudf-exports + CONDITION KvikIO_BINARY_DIR + ) endfunction() diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake index 0e03352c335..285d66287f3 100644 --- a/cpp/cmake/thirdparty/get_libcudacxx.cmake +++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake @@ -22,16 +22,14 @@ function(find_and_configure_libcudacxx) include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) - if(libcudacxx_SOURCE_DIR) - # Store where CMake can find our custom Thrust install - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - INSTALL - libcudacxx - [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/libcudacxx]=] - cudf-exports - ) - endif() + # Store where CMake can find our custom Thrust install + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + INSTALL libcudacxx + [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/libcudacxx]=] + EXPORT_SET cudf-exports + CONDITION libcudacxx_SOURCE_DIR + ) endfunction() find_and_configure_libcudacxx() diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index fff5b84af0d..c0e07d02d94 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -27,7 +27,9 @@ function(find_and_configure_spdlog) NAMESPACE spdlog:: ) include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + rapids_export_find_package_root( + BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports + ) endif() endfunction() diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake index 39a9de15fa6..67ed4287d7b 100644 --- a/cpp/cmake/thirdparty/get_thrust.cmake +++ b/cpp/cmake/thirdparty/get_thrust.cmake @@ -33,14 +33,13 @@ function(find_and_configure_thrust) INSTALL_EXPORT_SET cudf-exports ) - if(Thrust_SOURCE_DIR) - # Store where CMake can find our custom Thrust install - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - INSTALL Thrust - [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=] cudf-exports - ) - endif() + # Store where CMake can find our custom Thrust install + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=] + EXPORT_SET cudf-exports + CONDITION Thrust_SOURCE_DIR + ) endfunction() find_and_configure_thrust() From ef92310155eb663859a47b73b6e8fc119d0bebc6 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:04:06 -0400 Subject: [PATCH 62/76] Fix memset error in nvtext::edit_distance_matrix (#14283) Fixes a bug in `nvtext::edit_distance_matrix` where the internal offsets vector is initialized to 0. This error was introduced in #13912 The bug was found while working on a different PR which re-ordered the nvtext gtests execution causing device memory to be reused from the rmm pool in a different way. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14283 --- cpp/src/text/edit_distance.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu index 1460be4fcf5..3d5f2d72e6f 100644 --- a/cpp/src/text/edit_distance.cu +++ b/cpp/src/text/edit_distance.cu @@ -224,7 +224,7 @@ std::unique_ptr edit_distance_matrix(cudf::strings_column_view con cudf::size_type n_upper = (strings_count * (strings_count - 1)) / 2; rmm::device_uvector offsets(n_upper, stream); auto d_offsets = offsets.data(); - CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, n_upper * sizeof(cudf::size_type), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, n_upper * sizeof(std::ptrdiff_t), stream.value())); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), From c47546ed1907a47d22acefcf7e854ef8788d0d4c Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 16 Oct 2023 16:12:17 -0700 Subject: [PATCH 63/76] Add stream parameter to List Sort and Filter APIs (#14272) This PR introduces the stream parameter to the List Sorting and Filtering APIs. Sorting and Filtering (`extract.hpp`, `filling.hpp`, `gather.hpp`, `reverse.hpp`, `sorting.hpp`, `stream_compaction.hpp`) ``` extract_list_element - index extract_list_element - indices segmented_gather sequences - without steps sequences - with steps reverse sort_lists stable_sort_lists apply_boolean_mask distinct ``` Reference [13744](https://github.com/rapidsai/cudf/issues/13744) Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14272 --- cpp/include/cudf/lists/extract.hpp | 6 +- cpp/include/cudf/lists/filling.hpp | 6 ++ cpp/include/cudf/lists/gather.hpp | 2 + cpp/include/cudf/lists/reverse.hpp | 4 +- cpp/include/cudf/lists/sorting.hpp | 5 +- cpp/include/cudf/lists/stream_compaction.hpp | 6 +- cpp/src/lists/copying/segmented_gather.cu | 4 +- cpp/src/lists/extract.cu | 6 +- cpp/src/lists/reverse.cu | 6 +- cpp/src/lists/segmented_sort.cu | 7 +- cpp/src/lists/sequences.cu | 6 +- .../stream_compaction/apply_boolean_mask.cu | 3 +- cpp/src/lists/stream_compaction/distinct.cu | 3 +- cpp/tests/groupby/histogram_tests.cpp | 1 + cpp/tests/streams/lists_test.cpp | 81 +++++++++++++++++++ 15 files changed, 129 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/lists/extract.hpp b/cpp/include/cudf/lists/extract.hpp index e92354134e8..14c0f59e17d 100644 --- a/cpp/include/cudf/lists/extract.hpp +++ b/cpp/include/cudf/lists/extract.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,12 +59,14 @@ namespace lists { * * @param lists_column Column to extract elements from. * @param index The row within each sublist to retrieve. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return Column of extracted elements. */ std::unique_ptr extract_list_element( lists_column_view const& lists_column, size_type index, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -97,6 +99,7 @@ std::unique_ptr extract_list_element( * @param lists_column Column to extract elements from. * @param indices The column whose rows indicate the element index to be retrieved from each list * row. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return Column of extracted elements. * @throws cudf::logic_error If the sizes of `lists_column` and `indices` do not match. @@ -104,6 +107,7 @@ std::unique_ptr extract_list_element( std::unique_ptr extract_list_element( lists_column_view const& lists_column, column_view const& indices, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/filling.hpp b/cpp/include/cudf/lists/filling.hpp index 059ed5ffd33..3730e16482d 100644 --- a/cpp/include/cudf/lists/filling.hpp +++ b/cpp/include/cudf/lists/filling.hpp @@ -17,7 +17,9 @@ #pragma once #include +#include +#include #include #include @@ -57,12 +59,14 @@ namespace cudf::lists { * * @param starts First values in the result sequences. * @param sizes Numbers of values in the result sequences. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return The result column containing generated sequences. */ std::unique_ptr sequences( column_view const& starts, column_view const& sizes, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -96,6 +100,7 @@ std::unique_ptr sequences( * @param starts First values in the result sequences. * @param steps Increment values for the result sequences. * @param sizes Numbers of values in the result sequences. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return The result column containing generated sequences. */ @@ -103,6 +108,7 @@ std::unique_ptr sequences( column_view const& starts, column_view const& steps, column_view const& sizes, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index 38bed9ede43..5e6ab6816e6 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -65,6 +65,7 @@ namespace lists { * @param bounds_policy Can be `DONT_CHECK` or `NULLIFY`. Selects whether or not to nullify the * output list row's element, when the gather index falls outside the range `[-n, n)`, * where `n` is the number of elements in list row corresponding to the gather-map row. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource to allocate any returned objects * @return column with elements in list of rows gathered based on `gather_map_list` * @@ -73,6 +74,7 @@ std::unique_ptr segmented_gather( lists_column_view const& source_column, lists_column_view const& gather_map_list, out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/reverse.hpp b/cpp/include/cudf/lists/reverse.hpp index 226d417c53a..864cd796f72 100644 --- a/cpp/include/cudf/lists/reverse.hpp +++ b/cpp/include/cudf/lists/reverse.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,11 +42,13 @@ namespace cudf::lists { * @endcode * * @param input Lists column for this operation + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New lists column with reversed lists */ std::unique_ptr reverse( lists_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index c203c452b0d..39a52c75a98 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,6 +46,7 @@ namespace lists { * @param source_column View of the list column of numeric types to sort * @param column_order The desired sort order * @param null_precedence The desired order of null compared to other elements in the list + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to allocate any returned objects * @return list column with elements in each list sorted. * @@ -54,6 +55,7 @@ std::unique_ptr sort_lists( lists_column_view const& source_column, order column_order, null_order null_precedence, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -66,6 +68,7 @@ std::unique_ptr stable_sort_lists( lists_column_view const& source_column, order column_order, null_order null_precedence, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/stream_compaction.hpp b/cpp/include/cudf/lists/stream_compaction.hpp index 5ddaa992184..3ac4f6861ec 100644 --- a/cpp/include/cudf/lists/stream_compaction.hpp +++ b/cpp/include/cudf/lists/stream_compaction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,12 +54,14 @@ namespace cudf::lists { * * @param input The input list column view to be filtered * @param boolean_mask A nullable list of bools column used to filter `input` elements + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return List column of the same type as `input`, containing filtered list rows */ std::unique_ptr apply_boolean_mask( lists_column_view const& input, lists_column_view const& boolean_mask, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -78,6 +80,7 @@ std::unique_ptr apply_boolean_mask( * @param input The input lists column * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether floating-point NaNs should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned object * @return The resulting lists column containing lists without duplicates */ @@ -85,6 +88,7 @@ std::unique_ptr distinct( lists_column_view const& input, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 79d33e7c17d..855ceadf33f 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -116,11 +116,11 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, std::unique_ptr segmented_gather(lists_column_view const& source_column, lists_column_view const& gather_map_list, out_of_bounds_policy bounds_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::segmented_gather( - source_column, gather_map_list, bounds_policy, cudf::get_default_stream(), mr); + return detail::segmented_gather(source_column, gather_map_list, bounds_policy, stream, mr); } } // namespace lists diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index 5d4a20d1cb8..365e9ef8255 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -196,10 +196,11 @@ std::unique_ptr extract_list_element(lists_column_view lists_column, */ std::unique_ptr extract_list_element(lists_column_view const& lists_column, size_type index, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::extract_list_element(lists_column, index, cudf::get_default_stream(), mr); + return detail::extract_list_element(lists_column, index, stream, mr); } /** @@ -209,12 +210,13 @@ std::unique_ptr extract_list_element(lists_column_view const& lists_colu */ std::unique_ptr extract_list_element(lists_column_view const& lists_column, column_view const& indices, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(indices.size() == lists_column.size(), "Index column must have as many elements as lists column."); - return detail::extract_list_element(lists_column, indices, cudf::get_default_stream(), mr); + return detail::extract_list_element(lists_column, indices, stream, mr); } } // namespace lists diff --git a/cpp/src/lists/reverse.cu b/cpp/src/lists/reverse.cu index a2af85b5dad..6c00f8b64b4 100644 --- a/cpp/src/lists/reverse.cu +++ b/cpp/src/lists/reverse.cu @@ -86,10 +86,12 @@ std::unique_ptr reverse(lists_column_view const& input, } // namespace detail -std::unique_ptr reverse(lists_column_view const& input, rmm::mr::device_memory_resource* mr) +std::unique_ptr reverse(lists_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::reverse(input, cudf::get_default_stream(), mr); + return detail::reverse(input, stream, mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 49054ebb046..0b70773f4b2 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -119,20 +119,21 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, std::unique_ptr sort_lists(lists_column_view const& input, order column_order, null_order null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sort_lists(input, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::sort_lists(input, column_order, null_precedence, stream, mr); } std::unique_ptr stable_sort_lists(lists_column_view const& input, order column_order, null_order null_precedence, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::stable_sort_lists( - input, column_order, null_precedence, cudf::get_default_stream(), mr); + return detail::stable_sort_lists(input, column_order, null_precedence, stream, mr); } } // namespace lists diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index aaee5608cc3..f92ba782da7 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -208,19 +208,21 @@ std::unique_ptr sequences(column_view const& starts, std::unique_ptr sequences(column_view const& starts, column_view const& sizes, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequences(starts, sizes, cudf::get_default_stream(), mr); + return detail::sequences(starts, sizes, stream, mr); } std::unique_ptr sequences(column_view const& starts, column_view const& steps, column_view const& sizes, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::sequences(starts, steps, sizes, cudf::get_default_stream(), mr); + return detail::sequences(starts, steps, sizes, stream, mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index ad43fbd5b00..ce972d89150 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -101,10 +101,11 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, std::unique_ptr apply_boolean_mask(lists_column_view const& input, lists_column_view const& boolean_mask, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr); + return detail::apply_boolean_mask(input, boolean_mask, stream, mr); } } // namespace cudf::lists diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu index 48d8babb4fa..eb21787b3fa 100644 --- a/cpp/src/lists/stream_compaction/distinct.cu +++ b/cpp/src/lists/stream_compaction/distinct.cu @@ -76,10 +76,11 @@ std::unique_ptr distinct(lists_column_view const& input, std::unique_ptr distinct(lists_column_view const& input, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::distinct(input, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::distinct(input, nulls_equal, nans_equal, stream, mr); } } // namespace cudf::lists diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index c5833f40cf2..612486d8e5c 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -67,6 +67,7 @@ auto groupby_histogram(cudf::column_view const& keys, auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals}, cudf::order::ASCENDING, cudf::null_order::BEFORE, + cudf::get_default_stream(), rmm::mr::get_current_device_resource()); return std::pair{std::move(sorted_keys), std::move(sorted_histograms)}; diff --git a/cpp/tests/streams/lists_test.cpp b/cpp/tests/streams/lists_test.cpp index e292b551d83..82a4cb8aa4a 100644 --- a/cpp/tests/streams/lists_test.cpp +++ b/cpp/tests/streams/lists_test.cpp @@ -21,6 +21,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include class ListTest : public cudf::test::BaseFixture {}; @@ -85,3 +91,78 @@ TEST_F(ListTest, CountElements) cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7}, {4, 5}}; cudf::lists::count_elements(list_col, cudf::test::get_default_stream()); } + +TEST_F(ListTest, ExtractListElementFromIndex) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7}, {4, 5}}; + cudf::lists::extract_list_element(list_col, -1, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ExtractListElementFromIndices) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7}, {4, 5}}; + cudf::test::fixed_width_column_wrapper indices({-1, -2, -1}); + cudf::lists::extract_list_element(list_col, indices, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, SegmentedGather) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::test::lists_column_wrapper gather_map_list{{0}, {1, 2}, {1}}; + cudf::lists::segmented_gather(list_col, + gather_map_list, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::test::get_default_stream()); +} + +TEST_F(ListTest, Sequences) +{ + cudf::test::fixed_width_column_wrapper starts({0, 1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper sizes({0, 1, 2, 2, 1}); + cudf::lists::sequences(starts, sizes, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, SequencesWithSteps) +{ + cudf::test::fixed_width_column_wrapper starts({0, 1, 2, 3, 4}); + cudf::test::fixed_width_column_wrapper steps({2, 1, 1, 1, -3}); + cudf::test::fixed_width_column_wrapper sizes({0, 1, 2, 2, 1}); + cudf::lists::sequences(starts, steps, sizes, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, Reverse) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::lists::reverse(list_col, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, SortLists) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::lists::sort_lists( + list_col, cudf::order::DESCENDING, cudf::null_order::AFTER, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, StableSortLists) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::lists::stable_sort_lists( + list_col, cudf::order::DESCENDING, cudf::null_order::AFTER, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, ApplyBooleanMask) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::test::lists_column_wrapper boolean_mask{{0, 1}, {1, 1, 1, 0}, {0, 1}}; + cudf::lists::apply_boolean_mask(list_col, boolean_mask, cudf::test::get_default_stream()); +} + +TEST_F(ListTest, Distinct) +{ + cudf::test::lists_column_wrapper list_col{{0, 1}, {2, 3, 7, 8}, {4, 5}}; + cudf::test::lists_column_wrapper boolean_mask{{0, 1}, {1, 1, 1, 0}, {0, 1}}; + cudf::lists::distinct(list_col, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); +} From 5f05c180b80b70fc09ea58aef2494c57edc44b9c Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:32:12 -0400 Subject: [PATCH 64/76] Enable indexalator for device code (#14206) Enables indexalator to be instantiated from device code. Also add gtests for the output indexalator. This change helps enable for the offset-normalizing-iterator #14234 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/14206 --- .../cudf/detail/normalizing_iterator.cuh | 40 +++++-- cpp/tests/iterator/indexalator_test.cu | 100 ++++++++++++++++++ 2 files changed, 131 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 51b3133f84f..35a695d47df 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -34,7 +34,7 @@ namespace detail { */ template struct base_normalator { - static_assert(std::is_integral_v); + static_assert(cudf::is_index_type()); using difference_type = std::ptrdiff_t; using value_type = Integer; using pointer = Integer*; @@ -202,13 +202,34 @@ struct base_normalator { return static_cast(*this).p_ >= rhs.p_; } + private: + struct integer_sizeof_fn { + template ()>* = nullptr> + CUDF_HOST_DEVICE constexpr std::size_t operator()() const + { +#ifndef __CUDA_ARCH__ + CUDF_FAIL("only integral types are supported"); +#else + CUDF_UNREACHABLE("only integral types are supported"); +#endif + } + template ()>* = nullptr> + CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept + { + return sizeof(T); + } + }; + protected: /** * @brief Constructor assigns width and type member variables for base class. */ - explicit base_normalator(data_type dtype) : width_(size_of(dtype)), dtype_(dtype) {} + explicit CUDF_HOST_DEVICE base_normalator(data_type dtype) : dtype_(dtype) + { + width_ = static_cast(type_dispatcher(dtype, integer_sizeof_fn{})); + } - int width_; /// integer type width = 1,2,4, or 8 + int32_t width_; /// integer type width = 1,2,4, or 8 data_type dtype_; /// for type-dispatcher calls }; @@ -244,12 +265,12 @@ struct input_normalator : base_normalator, Integer> { * @brief Dispatch functor for resolving a Integer value from any integer type */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const* tp) { return static_cast(*static_cast(tp)); } - template >* = nullptr> + template ()>* = nullptr> __device__ Integer operator()(void const*) { CUDF_UNREACHABLE("only integral types are supported"); @@ -274,9 +295,10 @@ struct input_normalator : base_normalator, Integer> { * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - input_normalator(void const* data, data_type dtype) + CUDF_HOST_DEVICE input_normalator(void const* data, data_type dtype, cudf::size_type offset = 0) : base_normalator, Integer>(dtype), p_{static_cast(data)} { + p_ += offset * this->width_; } char const* p_; /// pointer to the integer data in device memory @@ -327,12 +349,12 @@ struct output_normalator : base_normalator, Integer> * @brief Dispatch functor for setting the index value from a size_type value. */ struct normalize_type { - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void* tp, Integer const value) { (*static_cast(tp)) = static_cast(value); } - template >* = nullptr> + template ()>* = nullptr> __device__ void operator()(void*, Integer const) { CUDF_UNREACHABLE("only index types are supported"); @@ -355,7 +377,7 @@ struct output_normalator : base_normalator, Integer> * @param data Pointer to an integer array in device memory. * @param data_type Type of data in data */ - output_normalator(void* data, data_type dtype) + CUDF_HOST_DEVICE output_normalator(void* data, data_type dtype) : base_normalator, Integer>(dtype), p_{static_cast(data)} { } diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 1ff7f4c42a5..3e8bcd5cb0d 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -20,9 +20,13 @@ #include +#include +#include #include #include #include +#include +#include using TestingTypes = cudf::test::IntegralTypesNotBool; @@ -94,3 +98,99 @@ TYPED_TEST(IndexalatorTest, optional_iterator) auto it_dev = cudf::detail::indexalator_factory::make_input_optional_iterator(d_col); this->iterator_test_thrust(expected_values, it_dev, host_values.size()); } + +template +struct transform_fn { + __device__ cudf::size_type operator()(Integer v) + { + return static_cast(v) + static_cast(v); + } +}; + +TYPED_TEST(IndexalatorTest, output_iterator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto itr = cudf::detail::indexalator_factory::make_output_iterator(d_col2); + auto input = cudf::column_view(d_col1); + auto stream = cudf::get_default_stream(); + + auto map = cudf::test::fixed_width_column_wrapper({0, 2, 4, 6, 8, 1, 3, 5, 7}); + auto d_map = cudf::column_view(map); + thrust::gather( + rmm::exec_policy_nosync(stream), d_map.begin(), d_map.end(), input.begin(), itr); + auto expected = + cudf::test::fixed_width_column_wrapper({0, 7, 23, 43, 63, 6, 14, 33, 45}); + thrust::scatter( + rmm::exec_policy_nosync(stream), input.begin(), input.end(), d_map.begin(), itr); + expected = + cudf::test::fixed_width_column_wrapper({0, 33, 6, 43, 7, 45, 14, 63, 23}); + + thrust::transform( + rmm::exec_policy(stream), input.begin(), input.end(), itr, transform_fn{}); + expected = + cudf::test::fixed_width_column_wrapper({0, 12, 14, 28, 46, 66, 86, 90, 126}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::fill(rmm::exec_policy(stream), itr, itr + input.size(), 77); + expected = + cudf::test::fixed_width_column_wrapper({77, 77, 77, 77, 77, 77, 77, 77, 77}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + thrust::sequence(rmm::exec_policy(stream), itr, itr + input.size()); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); + + auto indices = + cudf::test::fixed_width_column_wrapper({0, 10, 20, 30, 40, 50, 60, 70, 80}); + auto d_indices = cudf::column_view(indices); + thrust::lower_bound(rmm::exec_policy(stream), + d_indices.begin(), + d_indices.end(), + input.begin(), + input.end(), + itr); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 1, 2, 3, 4, 5, 5, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} + +/** + * For testing creating and using the indexalator in device code. + */ +struct device_functor_fn { + cudf::column_device_view const d_col; + __device__ cudf::size_type operator()(cudf::size_type idx) + { + auto itr = cudf::detail::input_indexalator(d_col.head(), d_col.type()); + return itr[idx] * 3; + } +}; + +TYPED_TEST(IndexalatorTest, device_indexalator) +{ + using T = TypeParam; + + auto d_col1 = + cudf::test::fixed_width_column_wrapper({0, 6, 7, 14, 23, 33, 43, 45, 63}); + auto d_col2 = + cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto input = cudf::column_view(d_col1); + auto output = cudf::mutable_column_view(d_col2); + auto stream = cudf::get_default_stream(); + + auto d_input = cudf::column_device_view::create(input, stream); + + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + output.begin(), + device_functor_fn{*d_input}); + + auto expected = + cudf::test::fixed_width_column_wrapper({0, 18, 21, 42, 69, 99, 129, 135, 189}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d_col2, expected); +} From 29b25373bb0074ebec18653b28b7fd1dc0196b1a Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Wed, 18 Oct 2023 08:35:31 -0500 Subject: [PATCH 65/76] update repo refs (#14289) --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 28 ++++++++++++++-------------- .github/workflows/test.yaml | 16 ++++++++-------- ci/release/update-version.sh | 2 +- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ab028eb89cc..1a7aa00aebf 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: ${{ inputs.build_type || 'branch' }} @@ -100,7 +100,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 214f9c90b41..8d6c471c912 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,34 +26,34 @@ jobs: - wheel-build-dask-cudf - wheel-tests-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_cudf.sh" @@ -61,14 +61,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: pull-request test_script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -78,7 +78,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -88,7 +88,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -98,21 +98,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-tests-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request @@ -120,7 +120,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9ca32bcfe03..db86e035067 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -36,7 +36,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: test_script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: nightly diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5e735a71994..eac64fe1a0f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -113,7 +113,7 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_ # CI files for FILE in .github/workflows/*.yaml; do - sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" + sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; done sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh From 7aa757959b5b597a0258c955fbeadb92cfb2e762 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 19 Oct 2023 00:10:19 +0100 Subject: [PATCH 66/76] Add nvtx annotations to spilling-based data movement (#14196) In nsys traces, it is useful to be able to see when memory allocations are provoking cudf-managed spilling. Do this by adding appropriate nvtx annotations. To enable this, we must move the nvtx decorators to a separate file avoiding circular imports. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/14196 --- python/cudf/cudf/core/buffer/spill_manager.py | 7 +++++ .../cudf/cudf/core/buffer/spillable_buffer.py | 24 +++++++++++---- python/cudf/cudf/core/dataframe.py | 7 ++--- python/cudf/cudf/core/frame.py | 7 ++--- python/cudf/cudf/core/groupby/groupby.py | 3 +- python/cudf/cudf/core/index.py | 8 ++--- python/cudf/cudf/core/indexed_frame.py | 3 +- python/cudf/cudf/core/multiindex.py | 3 +- python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/core/single_column_frame.py | 3 +- python/cudf/cudf/core/udf/groupby_utils.py | 2 +- python/cudf/cudf/core/udf/utils.py | 3 +- python/cudf/cudf/io/csv.py | 2 +- python/cudf/cudf/io/parquet.py | 2 +- python/cudf/cudf/io/text.py | 4 +-- python/cudf/cudf/utils/nvtx_annotation.py | 30 +++++++++++++++++++ python/cudf/cudf/utils/utils.py | 27 ----------------- python/dask_cudf/dask_cudf/backends.py | 2 +- python/dask_cudf/dask_cudf/core.py | 2 +- python/dask_cudf/dask_cudf/groupby.py | 2 +- python/dask_cudf/dask_cudf/sorting.py | 2 +- 21 files changed, 82 insertions(+), 63 deletions(-) create mode 100644 python/cudf/cudf/utils/nvtx_annotation.py diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index f056a0fd592..91f3b2cd544 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -11,14 +11,20 @@ import weakref from collections import defaultdict from dataclasses import dataclass +from functools import partial from typing import Dict, List, Optional, Tuple import rmm.mr from cudf.core.buffer.spillable_buffer import SpillableBuffer from cudf.options import get_option +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate from cudf.utils.string import format_bytes +_spill_cudf_nvtx_annotate = partial( + _cudf_nvtx_annotate, domain="cudf_python-spill" +) + def get_traceback() -> str: """Pretty print current traceback to a string""" @@ -329,6 +335,7 @@ def buffers( ret = tuple(sorted(ret, key=lambda b: b.last_accessed)) return ret + @_spill_cudf_nvtx_annotate def spill_device_memory(self, nbytes: int) -> int: """Try to spill device memory diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index 84fb2044c62..1856bec1876 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -20,6 +20,7 @@ get_ptr_and_size, host_memory_allocation, ) +from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate from cudf.utils.string import format_bytes if TYPE_CHECKING: @@ -291,8 +292,15 @@ def spill(self, target: str = "cpu") -> None: ) if (ptr_type, target) == ("gpu", "cpu"): - host_mem = host_memory_allocation(self.size) - rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem) + with annotate( + message="SpillDtoH", + color=_get_color_for_nvtx("SpillDtoH"), + domain="cudf_python-spill", + ): + host_mem = host_memory_allocation(self.size) + rmm._lib.device_buffer.copy_ptr_to_host( + self._ptr, host_mem + ) self._ptr_desc["memoryview"] = host_mem self._ptr = 0 self._owner = None @@ -302,9 +310,15 @@ def spill(self, target: str = "cpu") -> None: # trigger a new call to this buffer's `spill()`. # Therefore, it is important that spilling-on-demand doesn't # try to unspill an already locked buffer! - dev_mem = rmm.DeviceBuffer.to_device( - self._ptr_desc.pop("memoryview") - ) + with annotate( + message="SpillHtoD", + color=_get_color_for_nvtx("SpillHtoD"), + domain="cudf_python-spill", + ): + + dev_mem = rmm.DeviceBuffer.to_device( + self._ptr_desc.pop("memoryview") + ) self._ptr = dev_mem.ptr self._owner = dev_mem assert self._size == dev_mem.size diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index ead2f182e2d..b38345af83d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -95,11 +95,8 @@ min_scalar_type, numeric_normalize_types, ) -from cudf.utils.utils import ( - GetAttrGetItemMixin, - _cudf_nvtx_annotate, - _external_only_api, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api _cupy_nan_methods_map = { "min": "nanmin", diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 1e6d177f8ca..7cb78bc8d1f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -47,11 +47,8 @@ from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import find_common_type -from cudf.utils.utils import ( - _array_ufunc, - _cudf_nvtx_annotate, - _warn_no_dask_cudf, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf # TODO: It looks like Frame is missing a declaration of `copy`, need to add diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e1740140b44..3b8f0f3824a 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -29,7 +29,8 @@ from cudf.core.mixins import Reducible, Scannable from cudf.core.multiindex import MultiIndex from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply -from cudf.utils.utils import GetAttrGetItemMixin, _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import GetAttrGetItemMixin # The three functions below return the quantiles [25%, 50%, 75%] diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index de8a5948033..5c323bda9ea 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -64,12 +64,8 @@ is_mixed_with_object_dtype, numeric_normalize_types, ) -from cudf.utils.utils import ( - _cudf_nvtx_annotate, - _is_same_name, - _warn_no_dask_cudf, - search_range, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _is_same_name, _warn_no_dask_cudf, search_range def _lexsorted_equal_range( diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 1008cbdb67f..b1fb47eb790 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -69,7 +69,8 @@ ) from cudf.utils import docutils from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.utils import _cudf_nvtx_annotate, _warn_no_dask_cudf +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _warn_no_dask_cudf doc_reset_index_template = """ Reset the index of the {klass}, or a level of it. diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 21380bb841c..87a11478870 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -26,7 +26,8 @@ from cudf.core._compat import PANDAS_GE_150 from cudf.core.frame import Frame from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index -from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate, _is_same_name +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import NotIterable, _is_same_name def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]: diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a195738af54..00ba722136e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -90,7 +90,7 @@ is_mixed_with_object_dtype, to_cudf_compatible_scalar, ) -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate def _format_percentile_names(percentiles): diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 6a56ab8f3a5..e30e1c747f5 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -19,7 +19,8 @@ ) from cudf.core.column import ColumnBase, as_column from cudf.core.frame import Frame -from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import NotIterable class SingleColumnFrame(Frame, NotIterable): diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index b18720f5db5..5dbcf455e33 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -28,7 +28,7 @@ _supported_dtypes_from_frame, ) from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate def _get_frame_groupby_type(dtype, index_dtype): diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 35a3f6c1ffd..7b7ac2b3070 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -39,7 +39,8 @@ STRING_TYPES, TIMEDELTA_TYPES, ) -from cudf.utils.utils import _cudf_nvtx_annotate, initfunc +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import initfunc # Maximum size of a string column is 2 GiB _STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index bacc0641639..764885dd7b6 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -11,7 +11,7 @@ from cudf.api.types import is_scalar from cudf.utils import ioutils from cudf.utils.dtypes import _maybe_convert_to_default_type -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index d8510cf8e95..d84aff66d7b 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -22,7 +22,7 @@ from cudf.api.types import is_list_like from cudf.core.column import build_categorical_column, column_empty, full from cudf.utils import ioutils -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate BYTE_SIZES = { "kb": 1000, diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index eb2c7fa7ef6..0e19972f6e0 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -1,11 +1,11 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from io import BytesIO, StringIO import cudf from cudf._lib import text as libtext from cudf.utils import ioutils -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py new file mode 100644 index 00000000000..a4404e51232 --- /dev/null +++ b/python/cudf/cudf/utils/nvtx_annotation.py @@ -0,0 +1,30 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import hashlib +from functools import partial + +from nvtx import annotate + +_NVTX_COLORS = ["green", "blue", "purple", "rapids"] + + +def _get_color_for_nvtx(name): + m = hashlib.sha256() + m.update(name.encode()) + hash_value = int(m.hexdigest(), 16) + idx = hash_value % len(_NVTX_COLORS) + return _NVTX_COLORS[idx] + + +def _cudf_nvtx_annotate(func, domain="cudf_python"): + """Decorator for applying nvtx annotations to methods in cudf.""" + return annotate( + message=func.__qualname__, + color=_get_color_for_nvtx(func.__qualname__), + domain=domain, + )(func) + + +_dask_cudf_nvtx_annotate = partial( + _cudf_nvtx_annotate, domain="dask_cudf_python" +) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index e2cb3f145a1..0ff23bd37c6 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -2,15 +2,12 @@ import decimal import functools -import hashlib import os import traceback import warnings -from functools import partial from typing import FrozenSet, Set, Union import numpy as np -from nvtx import annotate import rmm @@ -119,8 +116,6 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs): "__ge__", } -_NVTX_COLORS = ["green", "blue", "purple", "rapids"] - # The test root is set by pytest to support situations where tests are run from # a source tree on a built version of cudf. NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS") @@ -353,28 +348,6 @@ def is_na_like(obj): return obj is None or obj is cudf.NA or obj is cudf.NaT -def _get_color_for_nvtx(name): - m = hashlib.sha256() - m.update(name.encode()) - hash_value = int(m.hexdigest(), 16) - idx = hash_value % len(_NVTX_COLORS) - return _NVTX_COLORS[idx] - - -def _cudf_nvtx_annotate(func, domain="cudf_python"): - """Decorator for applying nvtx annotations to methods in cudf.""" - return annotate( - message=func.__qualname__, - color=_get_color_for_nvtx(func.__qualname__), - domain=domain, - )(func) - - -_dask_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="dask_cudf_python" -) - - def _warn_no_dask_cudf(fn): @functools.wraps(fn) def wrapper(self): diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 344b03c631d..7b35c71ff09 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -42,7 +42,7 @@ import cudf from cudf.api.types import is_string_dtype -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate from .core import DataFrame, Index, Series diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 5b37e6e825c..17650c9b70d 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -22,7 +22,7 @@ import cudf from cudf import _lib as libcudf -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate from dask_cudf import sorting from dask_cudf.accessors import ListMethods, StructMethods diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index f4bbcaf4dd1..b1fdf443a17 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -15,7 +15,7 @@ from dask.utils import funcname import cudf -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate # aggregations that are dask-cudf optimized OPTIMIZED_AGGS = ( diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index d6c9c1be73c..27ba82c390c 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -16,7 +16,7 @@ import cudf as gd from cudf.api.types import is_categorical_dtype -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate _SHUFFLE_SUPPORT = ("tasks", "p2p") # "disk" not supported From 260269c3928297165550d102c927f42ac5f81776 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu, 19 Oct 2023 13:43:40 -0500 Subject: [PATCH 67/76] rename workflow repository (#14300) --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 28 ++++++++++++++-------------- .github/workflows/test.yaml | 16 ++++++++-------- ci/release/update-version.sh | 2 +- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0e120d34bb1..666d8844a80 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: ${{ inputs.build_type || 'branch' }} @@ -100,7 +100,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 054ea7968c8..cb9d67639ef 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,34 +26,34 @@ jobs: - wheel-build-dask-cudf - wheel-tests-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.10 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 with: build_type: pull-request conda-python-cudf-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 with: build_type: pull-request test_script: "ci/test_python_cudf.sh" @@ -61,14 +61,14 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 with: build_type: pull-request test_script: "ci/test_python_other.sh" conda-java-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -78,7 +78,7 @@ jobs: conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -88,7 +88,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -98,21 +98,21 @@ jobs: wheel-build-cudf: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: needs: wheel-tests-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request @@ -120,7 +120,7 @@ jobs: wheel-tests-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 030f2e41db4..1ba0a7491a8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -36,7 +36,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: test_script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 with: matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: nightly diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5e735a71994..eac64fe1a0f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -113,7 +113,7 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_ # CI files for FILE in .github/workflows/*.yaml; do - sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" + sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; done sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh From d36904b16fe6b0244fe63973f3a7ae0987062beb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 19 Oct 2023 14:01:50 -0700 Subject: [PATCH 68/76] Downgrade to Arrow 12.0.0 for aws-sdk-cpp and fix cudf_kafka builds for new CI containers (#14296) The aws-sdk-cpp pinning introduced in https://github.com/rapidsai/cudf/pull/14173 causes problems because newer builds of libarrow require a newer version of aws-sdk-cpp. Even though we restrict to libarrow 12.0.1, this restriction is insufficient to create solvable environments because the conda (mamba) solver doesn't seem to consistently reach far back enough into the history of builds to pull the last build that was compatible with the aws-sdk-cpp version that we need. For now, the safest way for us to avoid this problem is to downgrade to arrow 12.0.0, for which all conda package builds are pinned to the older version of aws-sdk-cpp that does not have the bug in question. Separately, while the above issue was encountered we also got new builds of our CI images [that removed system installs of CTK packages from CUDA 12 images](https://github.com/rapidsai/ci-imgs/pull/77). This changes was made because for CUDA 12 we can get all the necessary pieces of the CTK from conda-forge. However, it turns out that the cudf_kafka builds were implicitly relying on system CTK packages, and the cudf_kafka build is in fact not fully compatible with conda-forge CTK packages because it is not using CMake via scikit-build (nor any other more sophisticated library discovery mechanism like pkg-config) and therefore does not know how to find conda-forge CTK headers/libraries. This PR introduces a set of temporary patches to get around this limitation. These patches are not a long-term fix, and are only put in place assuming that #14292 is merged in the near future before we cut a 23.12 release. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/14296 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/environments/all_cuda-120_arch-x86_64.yaml | 5 +++-- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/cudf_kafka/build.sh | 14 +++++++++++++- conda/recipes/cudf_kafka/meta.yaml | 11 +++++++++++ conda/recipes/libcudf/conda_build_config.yaml | 4 ++-- conda/recipes/libcudf/meta.yaml | 4 ++-- cpp/cmake/thirdparty/get_arrow.cmake | 2 +- dependencies.yaml | 16 +++++++++------- python/cudf/pyproject.toml | 4 ++-- python/cudf_kafka/pyproject.toml | 2 +- 11 files changed, 47 insertions(+), 21 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 8976a4b14cb..2c79cbb6b6c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -40,7 +40,7 @@ dependencies: - hypothesis - identify>=2.5.20 - ipython -- libarrow==12.0.1.* +- libarrow==12.0.0.* - libcufile-dev=1.4.0.31 - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 @@ -69,7 +69,7 @@ dependencies: - pre-commit - protobuf>=4.21,<5 - ptxcompiler -- pyarrow==12.0.1.* +- pyarrow==12.0.0.* - pydata-sphinx-theme - pyorc - pytest diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index f54d78593c3..c96b7428882 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -17,6 +17,7 @@ dependencies: - cachetools - cmake>=3.26.4 - cuda-cudart-dev +- cuda-gdb - cuda-nvcc - cuda-nvrtc-dev - cuda-nvtx-dev @@ -41,7 +42,7 @@ dependencies: - hypothesis - identify>=2.5.20 - ipython -- libarrow==12.0.1.* +- libarrow==12.0.0.* - libcufile-dev - libcurand-dev - libkvikio==23.12.* @@ -66,7 +67,7 @@ dependencies: - pip - pre-commit - protobuf>=4.21,<5 -- pyarrow==12.0.1.* +- pyarrow==12.0.0.* - pydata-sphinx-theme - pyorc - pytest diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 54b687faa69..16b064a262e 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -61,7 +61,7 @@ requirements: - scikit-build >=0.13.1 - setuptools - dlpack >=0.5,<0.6.0a0 - - pyarrow =12 + - pyarrow =12.0.0 - libcudf ={{ version }} - rmm ={{ minor_version }} {% if cuda_major == "11" %} diff --git a/conda/recipes/cudf_kafka/build.sh b/conda/recipes/cudf_kafka/build.sh index 5d8720f1c98..f4bb6e1bc91 100644 --- a/conda/recipes/cudf_kafka/build.sh +++ b/conda/recipes/cudf_kafka/build.sh @@ -1,4 +1,16 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory +# Need to set CUDA_HOME inside conda environments because the hacked together +# setup.py for cudf-kafka searches that way. +# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates +# cudf_kafka to use scikit-build +CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*} +if [[ ${CUDA_MAJOR} == "12" ]]; then + target_name="x86_64-linux" + if [[ ! $(arch) == "x86_64" ]]; then + target_name="sbsa-linux" + fi + export CUDA_HOME="${PREFIX}/targets/${target_name}/" +fi ./build.sh -v cudf_kafka diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index ec0cc402511..a79c23b7d98 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -33,6 +33,9 @@ build: - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + - RAPIDS_CUDA_VERSION requirements: build: @@ -41,6 +44,11 @@ requirements: - {{ compiler('cxx') }} - ninja - sysroot_{{ target_platform }} {{ sysroot_version }} + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + {% if cuda_major == "12" %} + - cuda-gdb + {% endif %} host: - python - cython >=3.0.0 @@ -48,6 +56,9 @@ requirements: - cudf ={{ version }} - libcudf_kafka ={{ version }} - setuptools + {% if cuda_major == "12" %} + - cuda-cudart-dev + {% endif %} run: - python - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index b1f5b083e06..4d33bb89220 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -25,8 +25,8 @@ gtest_version: aws_sdk_cpp_version: - "<1.11" -libarrow_version: - - "=12" +libarrow: + - "==12.0.0" dlpack_version: - ">=0.5,<0.6.0a0" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 28357f0d96d..b9aff2a9c82 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -65,7 +65,7 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - nvcomp {{ nvcomp_version }} - - libarrow {{ libarrow_version }} + - libarrow {{ libarrow }} - dlpack {{ dlpack_version }} - librdkafka {{ librdkafka_version }} - fmt {{ fmt_version }} @@ -104,7 +104,7 @@ outputs: - nvcomp {{ nvcomp_version }} - librmm ={{ minor_version }} - libkvikio ={{ minor_version }} - - libarrow {{ libarrow_version }} + - libarrow {{ libarrow }} - dlpack {{ dlpack_version }} - gtest {{ gtest_version }} - gmock {{ gtest_version }} diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 10d3145a36f..c2d5cfbaf78 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -411,7 +411,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow) set(CUDF_VERSION_Arrow # This version must be kept in sync with the libarrow version pinned for builds in # dependencies.yaml. - 12.0.1 + 12.0.0 CACHE STRING "The version of Arrow to find (or build)" ) endif() diff --git a/dependencies.yaml b/dependencies.yaml index c19e8765be3..e8114fa5615 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -62,6 +62,7 @@ files: includes: - cudatoolkit - docs + - libarrow_run - py_version py_build_cudf: output: pyproject @@ -225,7 +226,7 @@ dependencies: - &gmock gmock>=1.13.0 # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - - libarrow==12.0.1.* + - &libarrow libarrow==12.0.0.* - librdkafka>=1.9.0,<1.10.0a0 # Align nvcomp version with rapids-cmake - nvcomp==2.6.1 @@ -243,7 +244,7 @@ dependencies: - cython>=3.0.0 # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - - pyarrow==12.0.1.* + - &pyarrow pyarrow==12.0.0.* # TODO: Pin to numpy<1.25 until cudf requires pandas 2 - &numpy numpy>=1.21,<1.25 build_python: @@ -260,16 +261,14 @@ dependencies: - protoc-wheel libarrow_run: common: - - output_types: [conda, requirements] + - output_types: conda packages: - # Allow runtime version to float up to minor version - - libarrow==12.* + - *libarrow pyarrow_run: common: - output_types: [conda, requirements, pyproject] packages: - # Allow runtime version to float up to minor version - - pyarrow==12.* + - *pyarrow cudatoolkit: specific: - output_types: conda @@ -282,6 +281,9 @@ dependencies: - cuda-nvrtc-dev - cuda-nvtx-dev - libcurand-dev + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + - cuda-gdb - matrix: cuda: "11.8" packages: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 39a8dca0267..ccb5d5d4416 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -8,7 +8,7 @@ requires = [ "ninja", "numpy>=1.21,<1.25", "protoc-wheel", - "pyarrow==12.0.1.*", + "pyarrow==12.0.0.*", "rmm==23.12.*", "scikit-build>=0.13.1", "setuptools", @@ -38,7 +38,7 @@ dependencies = [ "pandas>=1.3,<1.6.0dev0", "protobuf>=4.21,<5", "ptxcompiler", - "pyarrow==12.*", + "pyarrow==12.0.0.*", "rmm==23.12.*", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 78a7a83ac3a..ff475e5a72e 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cython>=3.0.0", "numpy>=1.21,<1.25", - "pyarrow==12.0.1.*", + "pyarrow==12.0.0.*", "setuptools", "wheel", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 50e2211506bf3de88bb625b2b6d684d0d799c274 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 20 Oct 2023 10:00:08 +0200 Subject: [PATCH 69/76] Changes JSON reader's recovery option's behaviour to ignore all characters after a valid JSON record (#14279) Closes https://github.com/rapidsai/cudf/issues/14226. The new behvior of `JSON_LINES_RECOVER` will now ignore excess characters after the first valid JSON record on each JSON line. ``` { "number": 1 } { "number": 1 } xyz { "number": 1 } {} { "number": 1 } { "number": 4 } ``` **Implementation details:** The JSON parser pushdown automaton was changed for `JSON_LINES_RECOVER` format such that when in state `PD_PVL` (`post-value`, "I have just finished parsing a value") and when the stack context is `ROOT` ("I'm not somewhere within a list or struct"), we just treat all characters as "white space" until encountering a newline character. `post-value` in stack context `ROOT` is exactly the condition we are in after having parsed the first valid record of a JSON line. _Thanks to @karthikeyann for suggesting to use `PD_PVL` as the capturing state._ As the stack context is generated upfront, we have to fix up and correct the stack context to set the stack context as `ROOT` stack context for all these excess characters. I.e., (`_` means `ROOT` stack context, `{` means within a `STRUCT` stack context): ``` in: {"a":1}{"this is supposed to be ignored"} stack: _{{{{{{_{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{ ``` Needs to be fixed up to become: ``` in: {"a":1}{"this is supposed to be ignored"} stack: _{{{{{{__________________________________ ``` Authors: - Elias Stehle (https://github.com/elstehle) - Karthikeyan (https://github.com/karthikeyann) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14279 --- cpp/src/io/fst/lookup_tables.cuh | 4 +- cpp/src/io/json/nested_json_gpu.cu | 165 +++++++++++++++++++++++++---- cpp/tests/io/json_test.cpp | 71 ++++++++++++- cpp/tests/io/nested_json_test.cpp | 2 +- 4 files changed, 218 insertions(+), 24 deletions(-) diff --git a/cpp/src/io/fst/lookup_tables.cuh b/cpp/src/io/fst/lookup_tables.cuh index 37c99453361..42036b79751 100644 --- a/cpp/src/io/fst/lookup_tables.cuh +++ b/cpp/src/io/fst/lookup_tables.cuh @@ -753,7 +753,7 @@ class TranslationOp { RelativeOffsetT const relative_offset, SymbolT const read_symbol) const { - return translation_op(*this, state_id, match_id, relative_offset, read_symbol); + return translation_op(state_id, match_id, relative_offset, read_symbol); } template @@ -761,7 +761,7 @@ class TranslationOp { SymbolIndexT const match_id, SymbolT const read_symbol) const { - return translation_op(*this, state_id, match_id, read_symbol); + return translation_op(state_id, match_id, read_symbol); } }; diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index c9107357239..3702d94fd2b 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -91,6 +91,98 @@ void check_input_size(std::size_t input_size) namespace cudf::io::json { +// FST to help fixing the stack context of characters that follow the first record on each JSON line +namespace fix_stack_of_excess_chars { + +// Type used to represent the target state in the transition table +using StateT = char; + +// Type used to represent a symbol group id +using SymbolGroupT = uint8_t; + +/** + * @brief Definition of the DFA's states. + */ +enum class dfa_states : StateT { + // Before the first record on the JSON line + BEFORE, + // Within the first record on the JSON line + WITHIN, + // Excess data that follows the first record on the JSON line + EXCESS, + // Total number of states + NUM_STATES +}; + +/** + * @brief Definition of the symbol groups + */ +enum class dfa_symbol_group_id : SymbolGroupT { + ROOT, ///< Symbol for root stack context + DELIMITER, ///< Line delimiter symbol group + OTHER, ///< Symbol group that implicitly matches all other tokens + NUM_SYMBOL_GROUPS ///< Total number of symbol groups +}; + +constexpr auto TT_NUM_STATES = static_cast(dfa_states::NUM_STATES); +constexpr auto NUM_SYMBOL_GROUPS = static_cast(dfa_symbol_group_id::NUM_SYMBOL_GROUPS); + +/** + * @brief Function object to map (input_symbol,stack_context) tuples to a symbol group. + */ +struct SymbolPairToSymbolGroupId { + CUDF_HOST_DEVICE SymbolGroupT operator()(thrust::tuple symbol) const + { + auto const input_symbol = thrust::get<0>(symbol); + auto const stack_symbol = thrust::get<1>(symbol); + return static_cast( + input_symbol == '\n' + ? dfa_symbol_group_id::DELIMITER + : (stack_symbol == '_' ? dfa_symbol_group_id::ROOT : dfa_symbol_group_id::OTHER)); + } +}; + +/** + * @brief Translation function object that fixes the stack context of excess data that follows after + * the first JSON record on each line. + */ +struct TransduceInputOp { + template + constexpr CUDF_HOST_DEVICE StackSymbolT operator()(StateT const state_id, + SymbolGroupT const match_id, + RelativeOffsetT const relative_offset, + SymbolT const read_symbol) const + { + if (state_id == static_cast(dfa_states::EXCESS)) { return '_'; } + return thrust::get<1>(read_symbol); + } + + template + constexpr CUDF_HOST_DEVICE int32_t operator()(StateT const state_id, + SymbolGroupT const match_id, + SymbolT const read_symbol) const + { + constexpr int32_t single_output_item = 1; + return single_output_item; + } +}; + +// Aliases for readability of the transition table +constexpr auto TT_BEFORE = dfa_states::BEFORE; +constexpr auto TT_INSIDE = dfa_states::WITHIN; +constexpr auto TT_EXCESS = dfa_states::EXCESS; + +// Transition table +std::array, TT_NUM_STATES> constexpr transition_table{ + {/* IN_STATE ROOT NEWLINE OTHER */ + /* TT_BEFORE */ {{TT_BEFORE, TT_BEFORE, TT_INSIDE}}, + /* TT_INSIDE */ {{TT_EXCESS, TT_BEFORE, TT_INSIDE}}, + /* TT_EXCESS */ {{TT_EXCESS, TT_BEFORE, TT_EXCESS}}}}; + +// The DFA's starting state +constexpr auto start_state = static_cast(dfa_states::BEFORE); +} // namespace fix_stack_of_excess_chars + // FST to prune tokens of invalid lines for recovering JSON lines format namespace token_filter { @@ -146,9 +238,8 @@ struct UnwrapTokenFromSymbolOp { * invalid lines. */ struct TransduceToken { - template - constexpr CUDF_HOST_DEVICE SymbolT operator()(TransducerTableT const&, - StateT const state_id, + template + constexpr CUDF_HOST_DEVICE SymbolT operator()(StateT const state_id, SymbolGroupT const match_id, RelativeOffsetT const relative_offset, SymbolT const read_symbol) const @@ -165,9 +256,8 @@ struct TransduceToken { } } - template - constexpr CUDF_HOST_DEVICE int32_t operator()(TransducerTableT const&, - StateT const state_id, + template + constexpr CUDF_HOST_DEVICE int32_t operator()(StateT const state_id, SymbolGroupT const match_id, SymbolT const read_symbol) const { @@ -643,6 +733,11 @@ auto get_transition_table(json_format_cfg_t format) // PD_ANL describes the target state after a new line after encountering error state auto const PD_ANL = (format == json_format_cfg_t::JSON_LINES_RECOVER) ? PD_BOV : PD_ERR; + // Target state after having parsed the first JSON value on a JSON line + // Spark has the special need to ignore everything that comes after the first JSON object + // on a JSON line instead of marking those as invalid + auto const PD_AFS = (format == json_format_cfg_t::JSON_LINES_RECOVER) ? PD_PVL : PD_ERR; + // First row: empty stack ("root" level of the JSON) // Second row: '[' on top of stack (we're parsing a list value) // Third row: '{' on top of stack (we're parsing a struct value) @@ -668,7 +763,7 @@ auto get_transition_table(json_format_cfg_t format) PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_BOV, PD_STR}; pda_tt[static_cast(pda_state_t::PD_PVL)] = { - PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_BOV, PD_ERR, + PD_AFS, PD_AFS, PD_AFS, PD_AFS, PD_AFS, PD_AFS, PD_AFS, PD_AFS, PD_PVL, PD_BOV, PD_AFS, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_BOV, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_BOV, PD_ERR}; pda_tt[static_cast(pda_state_t::PD_BFN)] = { @@ -733,6 +828,18 @@ auto get_translation_table(bool recover_from_error) return regular_tokens; }; + /** + * @brief Helper function that returns `recovering_tokens` if `recover_from_error` is true and + * returns `regular_tokens` otherwise. This is used to ignore excess characters after the first + * value in the case of JSON lines that recover from invalid lines, as Spark ignores any excess + * characters that follow the first record on a JSON line. + */ + auto alt_tokens = [recover_from_error](std::vector regular_tokens, + std::vector recovering_tokens) { + if (recover_from_error) { return recovering_tokens; } + return regular_tokens; + }; + std::array, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt; pda_tlt[static_cast(pda_state_t::PD_BOV)] = {{ /*ROOT*/ {StructBegin}, // OPENING_BRACE @@ -920,18 +1027,18 @@ auto get_translation_table(bool recover_from_error) {}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_PVL)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}, {}), // LINE_BREAK - {ErrorBegin}, // OTHER + { /*ROOT*/ + {alt_tokens({ErrorBegin}, {})}, // OPENING_BRACE + {alt_tokens({ErrorBegin}, {})}, // OPENING_BRACKET + {alt_tokens({ErrorBegin}, {})}, // CLOSING_BRACE + {alt_tokens({ErrorBegin}, {})}, // CLOSING_BRACKET + {alt_tokens({ErrorBegin}, {})}, // QUOTE + {alt_tokens({ErrorBegin}, {})}, // ESCAPE + {alt_tokens({ErrorBegin}, {})}, // COMMA + {alt_tokens({ErrorBegin}, {})}, // COLON + {}, // WHITE_SPACE + nl_tokens({}, {}), // LINE_BREAK + {alt_tokens({ErrorBegin}, {})}, // OTHER /*LIST*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -1446,6 +1553,26 @@ std::pair, rmm::device_uvector> ge // character. auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_symbols.data()); + // Spark, as the main stakeholder in the `recover_from_error` option, has the specific need to + // ignore any characters that follow the first value on each JSON line. This is an FST that + // fixes the stack context for those excess characters. That is, that all those excess characters + // will be interpreted in the root stack context + if (recover_from_error) { + auto fix_stack_of_excess_chars = fst::detail::make_fst( + fst::detail::make_symbol_group_lookup_op( + fix_stack_of_excess_chars::SymbolPairToSymbolGroupId{}), + fst::detail::make_transition_table(fix_stack_of_excess_chars::transition_table), + fst::detail::make_translation_functor(fix_stack_of_excess_chars::TransduceInputOp{}), + stream); + fix_stack_of_excess_chars.Transduce(zip_in, + static_cast(json_in.size()), + stack_symbols.data(), + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), + fix_stack_of_excess_chars::start_state, + stream); + } + constexpr auto max_translation_table_size = tokenizer_pda::NUM_PDA_SGIDS * static_cast(tokenizer_pda::pda_state_t::PD_NUM_STATES); diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 2ddb0b76544..0149a467c32 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -1957,11 +1957,11 @@ TEST_F(JsonReaderTest, JSONLinesRecovering) // 2 -> (invalid) R"({"b":{"a":[321})" "\n" - // 3 -> c: [1] (valid) + // 3 -> c: 1.2 (valid) R"({"c":1.2})" "\n" "\n" - // 4 -> a: 123 (valid) + // 4 -> a: 4 (valid) R"({"a":4})" "\n" // 5 -> (invalid) @@ -2020,4 +2020,71 @@ TEST_F(JsonReaderTest, JSONLinesRecovering) c_validity.cbegin()}); } +TEST_F(JsonReaderTest, JSONLinesRecoveringIgnoreExcessChars) +{ + /** + * @brief Spark has the specific need to ignore extra characters that come after the first record + * on a JSON line + */ + std::string data = + // 0 -> a: -2 (valid) + R"({"a":-2}{})" + "\n" + // 1 -> (invalid) + R"({"b":{}should_be_invalid})" + "\n" + // 2 -> b (valid) + R"({"b":{"a":3} })" + "\n" + // 3 -> c: (valid) + R"({"c":1.2 } )" + "\n" + "\n" + // 4 -> (valid) + R"({"a":4} 123)" + "\n" + // 5 -> (valid) + R"({"a":5}//Comment after record)" + "\n" + // 6 -> (valid) + R"({"a":6} //Comment after whitespace)" + "\n" + // 7 -> (invalid) + R"({"a":5 //Invalid Comment within record})"; + + auto filepath = temp_env->get_temp_dir() + "RecoveringLinesExcessChars.json"; + { + std::ofstream outfile(filepath, std::ofstream::out); + outfile << data; + } + + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL); + + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + + EXPECT_EQ(result.tbl->num_columns(), 3); + EXPECT_EQ(result.tbl->num_rows(), 8); + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64); + EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRUCT); + EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::FLOAT64); + + std::vector a_validity{true, false, false, false, true, true, true, false}; + std::vector b_validity{false, false, true, false, false, false, false, false}; + std::vector c_validity{false, false, false, true, false, false, false, false}; + + // Child column b->a + auto b_a_col = int64_wrapper({0, 0, 3, 0, 0, 0, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), + int64_wrapper{{-2, 0, 0, 0, 4, 5, 6, 0}, a_validity.cbegin()}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + result.tbl->get_column(1), cudf::test::structs_column_wrapper({b_a_col}, b_validity.cbegin())); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + result.tbl->get_column(2), + float64_wrapper{{0.0, 0.0, 0.0, 1.2, 0.0, 0.0, 0.0, 0.0}, c_validity.cbegin()}); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp index 3cb7e1f287a..5f79d5b862b 100644 --- a/cpp/tests/io/nested_json_test.cpp +++ b/cpp/tests/io/nested_json_test.cpp @@ -543,7 +543,7 @@ TEST_F(JsonTest, RecoveringTokenStream) { // Test input. Inline comments used to indicate character indexes // 012345678 <= line 0 - std::string const input = R"({"a":-2},)" + std::string const input = R"({"a":2 {})" // 9 "\n" // 01234 <= line 1 From f7ad66f440168fd4eceb3cc900301661023e42a1 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 20 Oct 2023 09:31:16 -0700 Subject: [PATCH 70/76] Add DELTA_BINARY_PACKED encoder for Parquet writer (#14100) Part of #13501. Adds ability to fall back on DELTA_BINARY_PACKED encoding when V2 page headers are selected and dictionary encoding is not possible. Authors: - Ed Seidl (https://github.com/etseidl) - Yunsong Wang (https://github.com/PointKernel) - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14100 --- cpp/src/io/parquet/delta_binary.cuh | 6 - cpp/src/io/parquet/delta_enc.cuh | 290 ++++++++ cpp/src/io/parquet/page_enc.cu | 998 ++++++++++++++++++++-------- cpp/src/io/parquet/parquet_gpu.hpp | 51 +- cpp/tests/io/parquet_test.cpp | 108 ++- 5 files changed, 1156 insertions(+), 297 deletions(-) create mode 100644 cpp/src/io/parquet/delta_enc.cuh diff --git a/cpp/src/io/parquet/delta_binary.cuh b/cpp/src/io/parquet/delta_binary.cuh index a513e6674b4..e3b23f4c0a0 100644 --- a/cpp/src/io/parquet/delta_binary.cuh +++ b/cpp/src/io/parquet/delta_binary.cuh @@ -46,12 +46,6 @@ namespace cudf::io::parquet::detail { // encoded with DELTA_LENGTH_BYTE_ARRAY encoding, which is a DELTA_BINARY_PACKED list of suffix // lengths, followed by the concatenated suffix data. -// TODO: The delta encodings use ULEB128 integers, but for now we're only -// using max 64 bits. Need to see what the performance impact is of using -// __int128_t rather than int64_t. -using uleb128_t = uint64_t; -using zigzag128_t = int64_t; - // we decode one mini-block at a time. max mini-block size seen is 64. constexpr int delta_rolling_buf_size = 128; diff --git a/cpp/src/io/parquet/delta_enc.cuh b/cpp/src/io/parquet/delta_enc.cuh new file mode 100644 index 00000000000..28f8cdfe2c1 --- /dev/null +++ b/cpp/src/io/parquet/delta_enc.cuh @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "parquet_gpu.hpp" + +#include +#include + +#include + +namespace cudf::io::parquet::detail { + +namespace delta { + +inline __device__ void put_uleb128(uint8_t*& p, uleb128_t v) +{ + while (v > 0x7f) { + *(p++) = v | 0x80; + v >>= 7; + } + *(p++) = v; +} + +inline __device__ void put_zz128(uint8_t*& p, zigzag128_t v) +{ + zigzag128_t s = (v < 0); + put_uleb128(p, (v ^ -s) * 2 + s); +} + +// A block size of 128, with 4 mini-blocks of 32 values each fits nicely without consuming +// too much shared memory. +// The parquet spec requires block_size to be a multiple of 128, and values_per_mini_block +// to be a multiple of 32. +constexpr int block_size = 128; +constexpr int num_mini_blocks = 4; +constexpr int values_per_mini_block = block_size / num_mini_blocks; +constexpr int buffer_size = 2 * block_size; + +// An extra sanity checks to enforce compliance with the parquet specification. +static_assert(block_size % 128 == 0); +static_assert(values_per_mini_block % 32 == 0); + +using block_reduce = cub::BlockReduce; +using warp_reduce = cub::WarpReduce; +using index_scan = cub::BlockScan; + +constexpr int rolling_idx(int index) { return rolling_index(index); } + +// Version of bit packer that can handle up to 64 bits values. +// T is the type to use for processing. if nbits <= 32 use uint32_t, otherwise unsigned long long +// (not uint64_t because of atomicOr's typing). allowing this to be selectable since there's a +// measurable impact to using the wider types. +template +inline __device__ void bitpack_mini_block( + uint8_t* dst, uleb128_t val, uint32_t count, uint8_t nbits, void* temp_space) +{ + using wide_type = + std::conditional_t, __uint128_t, uint64_t>; + using cudf::detail::warp_size; + scratch_type constexpr mask = sizeof(scratch_type) * 8 - 1; + auto constexpr div = sizeof(scratch_type) * 8; + + auto const lane_id = threadIdx.x % warp_size; + auto const warp_id = threadIdx.x / warp_size; + + auto const scratch = reinterpret_cast(temp_space) + warp_id * warp_size; + + // zero out scratch + scratch[lane_id] = 0; + __syncwarp(); + + // TODO: see if there is any savings using special packing for easy bitwidths (1,2,4,8,16...) + // like what's done for the RLE encoder. + if (nbits == div) { + if (lane_id < count) { + for (int i = 0; i < sizeof(scratch_type); i++) { + dst[lane_id * sizeof(scratch_type) + i] = val & 0xff; + val >>= 8; + } + } + return; + } + + if (lane_id <= count) { + // Shift symbol left by up to mask bits. + wide_type v2 = val; + v2 <<= (lane_id * nbits) & mask; + + // Copy N bit word into two N/2 bit words while following C++ strict aliasing rules. + scratch_type v1[2]; + memcpy(&v1, &v2, sizeof(wide_type)); + + // Atomically write result to scratch. + if (v1[0]) { atomicOr(scratch + ((lane_id * nbits) / div), v1[0]); } + if (v1[1]) { atomicOr(scratch + ((lane_id * nbits) / div) + 1, v1[1]); } + } + __syncwarp(); + + // Copy scratch data to final destination. + auto const available_bytes = util::div_rounding_up_safe(count * nbits, 8U); + auto const scratch_bytes = reinterpret_cast(scratch); + + for (uint32_t i = lane_id; i < available_bytes; i += warp_size) { + dst[i] = scratch_bytes[i]; + } + __syncwarp(); +} + +} // namespace delta + +// Object used to turn a stream of integers into a DELTA_BINARY_PACKED stream. This takes as input +// 128 values with validity at a time, saving them until there are enough values for a block +// to be written. +// T is the input data type (either zigzag128_t or uleb128_t). +template +class delta_binary_packer { + private: + uint8_t* _dst; // sink to dump encoded values to + T* _buffer; // buffer to store values to be encoded + size_type _current_idx; // index of first value in buffer + uint32_t _num_values; // total number of values to encode + size_type _values_in_buffer; // current number of values stored in _buffer + uint8_t _mb_bits[delta::num_mini_blocks]; // bitwidth for each mini-block + + // pointers to shared scratch memory for the warp and block scans/reduces + delta::index_scan::TempStorage* _scan_tmp; + delta::warp_reduce::TempStorage* _warp_tmp; + delta::block_reduce::TempStorage* _block_tmp; + + void* _bitpack_tmp; // pointer to shared scratch memory used in bitpacking + + // Write the delta binary header. Only call from thread 0. + inline __device__ void write_header() + { + delta::put_uleb128(_dst, delta::block_size); + delta::put_uleb128(_dst, delta::num_mini_blocks); + delta::put_uleb128(_dst, _num_values); + delta::put_zz128(_dst, _buffer[0]); + } + + // Write the block header. Only call from thread 0. + inline __device__ void write_block_header(zigzag128_t block_min) + { + delta::put_zz128(_dst, block_min); + memcpy(_dst, _mb_bits, 4); + _dst += 4; + } + + // Signed subtraction with defined wrapping behavior. + inline __device__ zigzag128_t subtract(zigzag128_t a, zigzag128_t b) + { + return static_cast(static_cast(a) - static_cast(b)); + } + + public: + inline __device__ auto num_values() const { return _num_values; } + + // Initialize the object. Only call from thread 0. + inline __device__ void init(uint8_t* dest, uint32_t num_values, T* buffer, void* temp_storage) + { + _dst = dest; + _num_values = num_values; + _buffer = buffer; + _scan_tmp = reinterpret_cast(temp_storage); + _warp_tmp = reinterpret_cast(temp_storage); + _block_tmp = reinterpret_cast(temp_storage); + _bitpack_tmp = _buffer + delta::buffer_size; + _current_idx = 0; + _values_in_buffer = 0; + } + + // Each thread calls this to add its current value. + inline __device__ void add_value(T value, bool is_valid) + { + // Figure out the correct position for the given value. + size_type const valid = is_valid; + size_type pos; + size_type num_valid; + delta::index_scan(*_scan_tmp).ExclusiveSum(valid, pos, num_valid); + + if (is_valid) { _buffer[delta::rolling_idx(pos + _current_idx + _values_in_buffer)] = value; } + __syncthreads(); + + if (threadIdx.x == 0) { + _values_in_buffer += num_valid; + // if first pass write header + if (_current_idx == 0) { + write_header(); + _current_idx = 1; + _values_in_buffer -= 1; + } + } + __syncthreads(); + + if (_values_in_buffer >= delta::block_size) { flush(); } + } + + // Called by each thread to flush data to the sink. + inline __device__ uint8_t const* flush() + { + using cudf::detail::warp_size; + __shared__ zigzag128_t block_min; + + int const t = threadIdx.x; + int const warp_id = t / warp_size; + int const lane_id = t % warp_size; + + if (_values_in_buffer <= 0) { return _dst; } + + // Calculate delta for this thread. + size_type const idx = _current_idx + t; + zigzag128_t const delta = idx < _num_values ? subtract(_buffer[delta::rolling_idx(idx)], + _buffer[delta::rolling_idx(idx - 1)]) + : std::numeric_limits::max(); + + // Find min delta for the block. + auto const min_delta = delta::block_reduce(*_block_tmp).Reduce(delta, cub::Min()); + + if (t == 0) { block_min = min_delta; } + __syncthreads(); + + // Compute frame of reference for the block. + uleb128_t const norm_delta = idx < _num_values ? subtract(delta, block_min) : 0; + + // Get max normalized delta for each warp, and use that to determine how many bits to use + // for the bitpacking of this warp. + zigzag128_t const warp_max = + delta::warp_reduce(_warp_tmp[warp_id]).Reduce(norm_delta, cub::Max()); + __syncwarp(); + + if (lane_id == 0) { _mb_bits[warp_id] = sizeof(zigzag128_t) * 8 - __clzll(warp_max); } + __syncthreads(); + + // write block header + if (t == 0) { write_block_header(block_min); } + __syncthreads(); + + // Now each warp encodes its data...can calculate starting offset with _mb_bits. + // NOTE: using a switch here rather than a loop because the compiler produces code that + // uses fewer registers. + int cumulative_bits = 0; + switch (warp_id) { + case 3: cumulative_bits += _mb_bits[2]; [[fallthrough]]; + case 2: cumulative_bits += _mb_bits[1]; [[fallthrough]]; + case 1: cumulative_bits += _mb_bits[0]; + } + uint8_t* const mb_ptr = _dst + cumulative_bits * delta::values_per_mini_block / 8; + + // encoding happens here + auto const warp_idx = _current_idx + warp_id * delta::values_per_mini_block; + if (warp_idx < _num_values) { + auto const num_enc = min(delta::values_per_mini_block, _num_values - warp_idx); + if (_mb_bits[warp_id] > 32) { + delta::bitpack_mini_block( + mb_ptr, norm_delta, num_enc, _mb_bits[warp_id], _bitpack_tmp); + } else { + delta::bitpack_mini_block( + mb_ptr, norm_delta, num_enc, _mb_bits[warp_id], _bitpack_tmp); + } + } + __syncthreads(); + + // Last warp updates global delta ptr. + if (warp_id == delta::num_mini_blocks - 1 && lane_id == 0) { + _dst = mb_ptr + _mb_bits[warp_id] * delta::values_per_mini_block / 8; + _current_idx = min(warp_idx + delta::values_per_mini_block, _num_values); + _values_in_buffer = max(_values_in_buffer - delta::block_size, 0U); + } + __syncthreads(); + + return _dst; + } +}; + +} // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 78873d5e8ca..1e4f061d2e0 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "delta_enc.cuh" #include "parquet_gpu.cuh" #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -41,13 +43,19 @@ #include #include +#include + namespace cudf::io::parquet::detail { namespace { using ::cudf::detail::device_2dspan; -constexpr uint32_t rle_buffer_size = (1 << 9); +constexpr int encode_block_size = 128; +constexpr int rle_buffer_size = 2 * encode_block_size; +constexpr int num_encode_warps = encode_block_size / cudf::detail::warp_size; + +constexpr int rolling_idx(int pos) { return rolling_index(pos); } // do not truncate statistics constexpr int32_t NO_TRUNC_STATS = 0; @@ -69,6 +77,7 @@ struct frag_init_state_s { PageFragment frag; }; +template struct page_enc_state_s { uint8_t* cur; //!< current output ptr uint8_t* rle_out; //!< current RLE write ptr @@ -81,14 +90,15 @@ struct page_enc_state_s { uint32_t rle_rpt_count; uint32_t page_start_val; uint32_t chunk_start_val; - volatile uint32_t rpt_map[4]; - volatile uint32_t scratch_red[32]; + volatile uint32_t rpt_map[num_encode_warps]; EncPage page; EncColumnChunk ck; parquet_column_device_view col; - uint32_t vals[rle_buffer_size]; + uint32_t vals[rle_buf_size]; }; +using rle_page_enc_state_s = page_enc_state_s; + /** * @brief Returns the size of the type in the Parquet file. */ @@ -205,6 +215,12 @@ void __device__ calculate_frag_size(frag_init_state_s* const s, int t) } } +/** + * @brief Determine the correct page encoding for the given page parameters. + * + * This is only used by the plain and dictionary encoders. Delta encoders will set the page + * encoding directly. + */ Encoding __device__ determine_encoding(PageType page_type, Type physical_type, bool use_dictionary, @@ -216,7 +232,6 @@ Encoding __device__ determine_encoding(PageType page_type, switch (page_type) { case PageType::DATA_PAGE: return use_dictionary ? Encoding::PLAIN_DICTIONARY : Encoding::PLAIN; case PageType::DATA_PAGE_V2: - // TODO need to work in delta encodings here when they're added return physical_type == BOOLEAN ? Encoding::RLE : use_dictionary ? Encoding::RLE_DICTIONARY : Encoding::PLAIN; @@ -236,6 +251,50 @@ struct BitwiseOr { } }; +// I is the column type from the input table +template +__device__ uint8_t const* delta_encode(page_enc_state_s<0>* s, + uint32_t valid_count, + uint64_t* buffer, + void* temp_space) +{ + using output_type = std::conditional_t, zigzag128_t, uleb128_t>; + __shared__ delta_binary_packer packer; + + auto const t = threadIdx.x; + if (t == 0) { + packer.init(s->cur, valid_count, reinterpret_cast(buffer), temp_space); + } + __syncthreads(); + + // TODO(ets): in the plain encoder the scaling is a little different for INT32 than INT64. + // might need to modify this if there's a big performance hit in the 32-bit case. + int32_t const scale = s->col.ts_scale == 0 ? 1 : s->col.ts_scale; + for (uint32_t cur_val_idx = 0; cur_val_idx < s->page.num_leaf_values;) { + uint32_t const nvals = min(s->page.num_leaf_values - cur_val_idx, delta::block_size); + + size_type const val_idx_in_block = cur_val_idx + t; + size_type const val_idx = s->page_start_val + val_idx_in_block; + + bool const is_valid = + (val_idx < s->col.leaf_column->size() && val_idx_in_block < s->page.num_leaf_values) + ? s->col.leaf_column->is_valid(val_idx) + : false; + + cur_val_idx += nvals; + + output_type v = s->col.leaf_column->element(val_idx); + if (scale < 0) { + v /= -scale; + } else { + v *= scale; + } + packer.add_value(v, is_valid); + } + + return packer.flush(); +} + } // anonymous namespace // blockDim {512,1,1} @@ -323,6 +382,29 @@ __global__ void __launch_bounds__(128) } } +__device__ size_t delta_data_len(Type physical_type, cudf::type_id type_id, uint32_t num_values) +{ + auto const dtype_len_out = physical_type_len(physical_type, type_id); + auto const dtype_len = [&]() -> uint32_t { + if (physical_type == INT32) { return int32_logical_len(type_id); } + if (physical_type == INT96) { return sizeof(int64_t); } + return dtype_len_out; + }(); + + auto const vals_per_block = delta::block_size; + size_t const num_blocks = util::div_rounding_up_unsafe(num_values, vals_per_block); + // need max dtype_len + 1 bytes for min_delta + // one byte per mini block for the bitwidth + // and block_size * dtype_len bytes for the actual encoded data + auto const block_size = dtype_len + 1 + delta::num_mini_blocks + vals_per_block * dtype_len; + + // delta header is 2 bytes for the block_size, 1 byte for number of mini-blocks, + // max 5 bytes for number of values, and max dtype_len + 1 for first value. + auto const header_size = 2 + 1 + 5 + dtype_len + 1; + + return header_size + num_blocks * block_size; +} + // blockDim {128,1,1} __global__ void __launch_bounds__(128) gpuInitPages(device_2dspan chunks, @@ -354,6 +436,14 @@ __global__ void __launch_bounds__(128) page_g = {}; } __syncthreads(); + + // if writing delta encoded values, we're going to need to know the data length to get a guess + // at the worst case number of bytes needed to encode. + auto const physical_type = col_g.physical_type; + auto const type_id = col_g.leaf_column->type().id(); + auto const is_use_delta = + write_v2_headers && !ck_g.use_dictionary && (physical_type == INT32 || physical_type == INT64); + if (t < 32) { uint32_t fragments_in_chunk = 0; uint32_t rows_in_page = 0; @@ -403,9 +493,12 @@ __global__ void __launch_bounds__(128) } __syncwarp(); if (t == 0) { - if (not pages.empty()) pages[ck_g.first_page] = page_g; - if (not page_sizes.empty()) page_sizes[ck_g.first_page] = page_g.max_data_size; - if (page_grstats) page_grstats[ck_g.first_page] = pagestats_g; + if (not pages.empty()) { + page_g.kernel_mask = encode_kernel_mask::PLAIN; + pages[ck_g.first_page] = page_g; + } + if (not page_sizes.empty()) { page_sizes[ck_g.first_page] = page_g.max_data_size; } + if (page_grstats) { page_grstats[ck_g.first_page] = pagestats_g; } } num_pages = 1; } @@ -505,7 +598,12 @@ __global__ void __launch_bounds__(128) page_g.num_values = values_in_page; auto const def_level_size = max_RLE_page_size(col_g.num_def_level_bits(), values_in_page); auto const rep_level_size = max_RLE_page_size(col_g.num_rep_level_bits(), values_in_page); - auto const max_data_size = page_size + def_level_size + rep_level_size + rle_pad; + // get a different bound if using delta encoding + if (is_use_delta) { + page_size = + max(page_size, delta_data_len(physical_type, type_id, page_g.num_leaf_values)); + } + auto const max_data_size = page_size + def_level_size + rep_level_size + rle_pad; // page size must fit in 32-bit signed integer if (max_data_size > std::numeric_limits::max()) { CUDF_UNREACHABLE("page size exceeds maximum for i32"); @@ -525,7 +623,16 @@ __global__ void __launch_bounds__(128) } __syncwarp(); if (t == 0) { - if (not pages.empty()) { pages[ck_g.first_page + num_pages] = page_g; } + if (not pages.empty()) { + if (is_use_delta) { + page_g.kernel_mask = encode_kernel_mask::DELTA_BINARY; + } else if (ck_g.use_dictionary || physical_type == BOOLEAN) { + page_g.kernel_mask = encode_kernel_mask::DICTIONARY; + } else { + page_g.kernel_mask = encode_kernel_mask::PLAIN; + } + pages[ck_g.first_page + num_pages] = page_g; + } if (not page_sizes.empty()) { page_sizes[ck_g.first_page + num_pages] = page_g.max_data_size; } @@ -789,8 +896,12 @@ inline __device__ void PackLiterals( * @param[in] t thread id (0..127) */ static __device__ void RleEncode( - page_enc_state_s* s, uint32_t numvals, uint32_t nbits, uint32_t flush, uint32_t t) + rle_page_enc_state_s* s, uint32_t numvals, uint32_t nbits, uint32_t flush, uint32_t t) { + using cudf::detail::warp_size; + auto const lane_id = t % warp_size; + auto const warp_id = t / warp_size; + uint32_t rle_pos = s->rle_pos; uint32_t rle_run = s->rle_run; @@ -798,20 +909,20 @@ static __device__ void RleEncode( uint32_t pos = rle_pos + t; if (rle_run > 0 && !(rle_run & 1)) { // Currently in a long repeat run - uint32_t mask = ballot(pos < numvals && s->vals[pos & (rle_buffer_size - 1)] == s->run_val); + uint32_t mask = ballot(pos < numvals && s->vals[rolling_idx(pos)] == s->run_val); uint32_t rle_rpt_count, max_rpt_count; - if (!(t & 0x1f)) { s->rpt_map[t >> 5] = mask; } + if (lane_id == 0) { s->rpt_map[warp_id] = mask; } __syncthreads(); - if (t < 32) { + if (t < warp_size) { uint32_t c32 = ballot(t >= 4 || s->rpt_map[t] != 0xffff'ffffu); - if (!t) { + if (t == 0) { uint32_t last_idx = __ffs(c32) - 1; s->rle_rpt_count = - last_idx * 32 + ((last_idx < 4) ? __ffs(~s->rpt_map[last_idx]) - 1 : 0); + last_idx * warp_size + ((last_idx < 4) ? __ffs(~s->rpt_map[last_idx]) - 1 : 0); } } __syncthreads(); - max_rpt_count = min(numvals - rle_pos, 128); + max_rpt_count = min(numvals - rle_pos, encode_block_size); rle_rpt_count = s->rle_rpt_count; rle_run += rle_rpt_count << 1; rle_pos += rle_rpt_count; @@ -828,17 +939,17 @@ static __device__ void RleEncode( } } else { // New run or in a literal run - uint32_t v0 = s->vals[pos & (rle_buffer_size - 1)]; - uint32_t v1 = s->vals[(pos + 1) & (rle_buffer_size - 1)]; + uint32_t v0 = s->vals[rolling_idx(pos)]; + uint32_t v1 = s->vals[rolling_idx(pos + 1)]; uint32_t mask = ballot(pos + 1 < numvals && v0 == v1); - uint32_t maxvals = min(numvals - rle_pos, 128); + uint32_t maxvals = min(numvals - rle_pos, encode_block_size); uint32_t rle_lit_count, rle_rpt_count; - if (!(t & 0x1f)) { s->rpt_map[t >> 5] = mask; } + if (lane_id == 0) { s->rpt_map[warp_id] = mask; } __syncthreads(); - if (t < 32) { + if (t < warp_size) { // Repeat run can only start on a multiple of 8 values - uint32_t idx8 = (t * 8) >> 5; - uint32_t pos8 = (t * 8) & 0x1f; + uint32_t idx8 = (t * 8) / warp_size; + uint32_t pos8 = (t * 8) % warp_size; uint32_t m0 = (idx8 < 4) ? s->rpt_map[idx8] : 0; uint32_t m1 = (idx8 < 3) ? s->rpt_map[idx8 + 1] : 0; uint32_t needed_mask = kRleRunMask[nbits - 1]; @@ -847,8 +958,8 @@ static __device__ void RleEncode( uint32_t rle_run_start = (mask != 0) ? min((__ffs(mask) - 1) * 8, maxvals) : maxvals; uint32_t rpt_len = 0; if (rle_run_start < maxvals) { - uint32_t idx_cur = rle_run_start >> 5; - uint32_t idx_ofs = rle_run_start & 0x1f; + uint32_t idx_cur = rle_run_start / warp_size; + uint32_t idx_ofs = rle_run_start % warp_size; while (idx_cur < 4) { m0 = (idx_cur < 4) ? s->rpt_map[idx_cur] : 0; m1 = (idx_cur < 3) ? s->rpt_map[idx_cur + 1] : 0; @@ -857,7 +968,7 @@ static __device__ void RleEncode( rpt_len += __ffs(mask) - 1; break; } - rpt_len += 32; + rpt_len += warp_size; idx_cur++; } } @@ -928,7 +1039,7 @@ static __device__ void RleEncode( * @param[in] flush nonzero if last batch in block * @param[in] t thread id (0..127) */ -static __device__ void PlainBoolEncode(page_enc_state_s* s, +static __device__ void PlainBoolEncode(rle_page_enc_state_s* s, uint32_t numvals, uint32_t flush, uint32_t t) @@ -938,7 +1049,7 @@ static __device__ void PlainBoolEncode(page_enc_state_s* s, while (rle_pos < numvals) { uint32_t pos = rle_pos + t; - uint32_t v = (pos < numvals) ? s->vals[pos & (rle_buffer_size - 1)] : 0; + uint32_t v = (pos < numvals) ? s->vals[rolling_idx(pos)] : 0; uint32_t n = min(numvals - rle_pos, 128); uint32_t nbytes = (n + ((flush) ? 7 : 0)) >> 3; if (!nbytes) { break; } @@ -992,28 +1103,22 @@ __device__ auto julian_days_with_time(int64_t v) return std::make_pair(dur_time_of_day_nanos, julian_days); } +// this has been split out into its own kernel because of the amount of shared memory required +// for the state buffer. encode kernels that don't use the RLE buffer can get started while +// the level data is encoded. // blockDim(128, 1, 1) template -__global__ void __launch_bounds__(128, 8) - gpuEncodePages(device_span pages, - device_span> comp_in, - device_span> comp_out, - device_span comp_results, - bool write_v2_headers) +__global__ void __launch_bounds__(block_size, 8) gpuEncodePageLevels(device_span pages, + bool write_v2_headers, + encode_kernel_mask kernel_mask) { - __shared__ __align__(8) page_enc_state_s state_g; - using block_reduce = cub::BlockReduce; - using block_scan = cub::BlockScan; - __shared__ union { - typename block_reduce::TempStorage reduce_storage; - typename block_scan::TempStorage scan_storage; - } temp_storage; + __shared__ __align__(8) rle_page_enc_state_s state_g; - page_enc_state_s* const s = &state_g; - auto const t = threadIdx.x; + auto* const s = &state_g; + uint32_t const t = threadIdx.x; if (t == 0) { - state_g = page_enc_state_s{}; + state_g = rle_page_enc_state_s{}; s->page = pages[blockIdx.x]; s->ck = *s->page.chunk; s->col = *s->ck.col_desc; @@ -1026,6 +1131,8 @@ __global__ void __launch_bounds__(128, 8) } __syncthreads(); + if (BitAnd(s->page.kernel_mask, kernel_mask) == 0) { return; } + auto const is_v2 = s->page.page_type == PageType::DATA_PAGE_V2; // Encode Repetition and Definition levels @@ -1078,23 +1185,24 @@ __global__ void __launch_bounds__(128, 8) } while (is_col_struct); return def; }(); - s->vals[(rle_numvals + t) & (rle_buffer_size - 1)] = def_lvl; + s->vals[rolling_idx(rle_numvals + t)] = def_lvl; __syncthreads(); rle_numvals += nrows; RleEncode(s, rle_numvals, def_lvl_bits, (rle_numvals == s->page.num_rows), t); __syncthreads(); } if (t < 32) { - uint8_t* const cur = s->cur; - uint8_t* const rle_out = s->rle_out; - uint32_t const rle_bytes = static_cast(rle_out - cur) - (is_v2 ? 0 : 4); - if (is_v2 && t == 0) { + uint8_t* const cur = s->cur; + uint8_t* const rle_out = s->rle_out; + // V2 does not write the RLE length field + uint32_t const rle_bytes = + static_cast(rle_out - cur) - (is_v2 ? 0 : RLE_LENGTH_FIELD_LEN); + if (not is_v2 && t < RLE_LENGTH_FIELD_LEN) { cur[t] = rle_bytes >> (t * 8); } + __syncwarp(); + if (t == 0) { + s->cur = rle_out; s->page.def_lvl_bytes = rle_bytes; - } else if (not is_v2 && t < 4) { - cur[t] = rle_bytes >> (t * 8); } - __syncwarp(); - if (t == 0) { s->cur = rle_out; } } } } else if (s->page.page_type != PageType::DICTIONARY_PAGE && @@ -1121,29 +1229,121 @@ __global__ void __launch_bounds__(128, 8) uint32_t idx = page_first_val_idx + rle_numvals + t; uint32_t lvl_val = (rle_numvals + t < s->page.num_values && idx < col_last_val_idx) ? lvl_val_data[idx] : 0; - s->vals[(rle_numvals + t) & (rle_buffer_size - 1)] = lvl_val; + s->vals[rolling_idx(rle_numvals + t)] = lvl_val; __syncthreads(); rle_numvals += nvals; RleEncode(s, rle_numvals, nbits, (rle_numvals == s->page.num_values), t); __syncthreads(); } if (t < 32) { - uint8_t* const cur = s->cur; - uint8_t* const rle_out = s->rle_out; - uint32_t const rle_bytes = static_cast(rle_out - cur) - (is_v2 ? 0 : 4); - if (is_v2 && t == 0) { + uint8_t* const cur = s->cur; + uint8_t* const rle_out = s->rle_out; + // V2 does not write the RLE length field + uint32_t const rle_bytes = + static_cast(rle_out - cur) - (is_v2 ? 0 : RLE_LENGTH_FIELD_LEN); + if (not is_v2 && t < RLE_LENGTH_FIELD_LEN) { cur[t] = rle_bytes >> (t * 8); } + __syncwarp(); + if (t == 0) { + s->cur = rle_out; lvl_bytes = rle_bytes; - } else if (not is_v2 && t < 4) { - cur[t] = rle_bytes >> (t * 8); } - __syncwarp(); - if (t == 0) { s->cur = rle_out; } } }; encode_levels(s->col.rep_values, s->col.num_rep_level_bits(), s->page.rep_lvl_bytes); __syncthreads(); encode_levels(s->col.def_values, s->col.num_def_level_bits(), s->page.def_lvl_bytes); } + + if (t == 0) { pages[blockIdx.x] = s->page; } +} + +template +__device__ void finish_page_encode(state_buf* s, + uint32_t valid_count, + uint8_t const* end_ptr, + device_span pages, + device_span> comp_in, + device_span> comp_out, + device_span comp_results, + bool write_v2_headers) +{ + auto const t = threadIdx.x; + + // V2 does not compress rep and def level data + size_t const skip_comp_size = + write_v2_headers ? s->page.def_lvl_bytes + s->page.rep_lvl_bytes : 0; + + if (t == 0) { + // only need num_nulls for v2 data page headers + if (write_v2_headers) { s->page.num_nulls = s->page.num_values - valid_count; } + uint8_t const* const base = s->page.page_data + s->page.max_hdr_size; + auto const actual_data_size = static_cast(end_ptr - base); + if (actual_data_size > s->page.max_data_size) { + CUDF_UNREACHABLE("detected possible page data corruption"); + } + s->page.max_data_size = actual_data_size; + if (not comp_in.empty()) { + comp_in[blockIdx.x] = {base + skip_comp_size, actual_data_size - skip_comp_size}; + comp_out[blockIdx.x] = {s->page.compressed_data + s->page.max_hdr_size + skip_comp_size, + 0}; // size is unused + } + pages[blockIdx.x] = s->page; + if (not comp_results.empty()) { + comp_results[blockIdx.x] = {0, compression_status::FAILURE}; + pages[blockIdx.x].comp_res = &comp_results[blockIdx.x]; + } + } + + // copy uncompressed bytes over + if (skip_comp_size != 0 && not comp_in.empty()) { + uint8_t* const src = s->page.page_data + s->page.max_hdr_size; + uint8_t* const dst = s->page.compressed_data + s->page.max_hdr_size; + for (int i = t; i < skip_comp_size; i += block_size) { + dst[i] = src[i]; + } + } +} + +// PLAIN page data encoder +// blockDim(128, 1, 1) +template +__global__ void __launch_bounds__(block_size, 8) + gpuEncodePages(device_span pages, + device_span> comp_in, + device_span> comp_out, + device_span comp_results, + bool write_v2_headers) +{ + __shared__ __align__(8) page_enc_state_s<0> state_g; + using block_reduce = cub::BlockReduce; + using block_scan = cub::BlockScan; + __shared__ union { + typename block_reduce::TempStorage reduce_storage; + typename block_scan::TempStorage scan_storage; + } temp_storage; + + auto* const s = &state_g; + uint32_t t = threadIdx.x; + + if (t == 0) { + state_g = page_enc_state_s<0>{}; + s->page = pages[blockIdx.x]; + s->ck = *s->page.chunk; + s->col = *s->ck.col_desc; + s->rle_len_pos = nullptr; + // get s->cur back to where it was at the end of encoding the rep and def level data + s->cur = + s->page.page_data + s->page.max_hdr_size + s->page.def_lvl_bytes + s->page.rep_lvl_bytes; + // if V1 data page, need space for the RLE length fields + if (s->page.page_type == PageType::DATA_PAGE) { + if (s->col.num_def_level_bits() != 0) { s->cur += RLE_LENGTH_FIELD_LEN; } + if (s->col.num_rep_level_bits() != 0) { s->cur += RLE_LENGTH_FIELD_LEN; } + } + } + __syncthreads(); + + if (BitAnd(s->page.kernel_mask, encode_kernel_mask::PLAIN) == 0) { return; } + // Encode data values __syncthreads(); auto const physical_type = s->col.physical_type; @@ -1155,10 +1355,6 @@ __global__ void __launch_bounds__(128, 8) return dtype_len_out; }(); - auto const dict_bits = (physical_type == BOOLEAN) ? 1 - : (s->ck.use_dictionary and s->page.page_type != PageType::DICTIONARY_PAGE) - ? s->ck.dict_rle_bits - : -1; if (t == 0) { uint8_t* dst = s->cur; s->rle_run = 0; @@ -1167,219 +1363,315 @@ __global__ void __launch_bounds__(128, 8) s->rle_out = dst; s->page.encoding = determine_encoding(s->page.page_type, physical_type, s->ck.use_dictionary, write_v2_headers); - if (dict_bits >= 0 && physical_type != BOOLEAN) { - dst[0] = dict_bits; - s->rle_out = dst + 1; - } else if (is_v2 && physical_type == BOOLEAN) { - // save space for RLE length. we don't know the total length yet. - s->rle_out = dst + RLE_LENGTH_FIELD_LEN; - s->rle_len_pos = dst; - } s->page_start_val = row_to_value_idx(s->page.start_row, s->col); s->chunk_start_val = row_to_value_idx(s->ck.start_row, s->col); } __syncthreads(); + uint32_t num_valid = 0; for (uint32_t cur_val_idx = 0; cur_val_idx < s->page.num_leaf_values;) { - uint32_t nvals = min(s->page.num_leaf_values - cur_val_idx, 128); + uint32_t nvals = min(s->page.num_leaf_values - cur_val_idx, block_size); uint32_t len, pos; auto [is_valid, val_idx] = [&]() { uint32_t val_idx; uint32_t is_valid; - size_type val_idx_in_block = cur_val_idx + t; + size_type const val_idx_in_block = cur_val_idx + t; if (s->page.page_type == PageType::DICTIONARY_PAGE) { val_idx = val_idx_in_block; is_valid = (val_idx < s->page.num_leaf_values); if (is_valid) { val_idx = s->ck.dict_data[val_idx]; } } else { - size_type val_idx_in_leaf_col = s->page_start_val + val_idx_in_block; + size_type const val_idx_in_leaf_col = s->page_start_val + val_idx_in_block; is_valid = (val_idx_in_leaf_col < s->col.leaf_column->size() && val_idx_in_block < s->page.num_leaf_values) ? s->col.leaf_column->is_valid(val_idx_in_leaf_col) : 0; - val_idx = - (s->ck.use_dictionary) ? val_idx_in_leaf_col - s->chunk_start_val : val_idx_in_leaf_col; + val_idx = val_idx_in_leaf_col; } return std::make_tuple(is_valid, val_idx); }(); - if (is_valid) num_valid++; - + if (is_valid) { num_valid++; } cur_val_idx += nvals; - if (dict_bits >= 0) { - // Dictionary encoding - if (dict_bits > 0) { - uint32_t rle_numvals; - uint32_t rle_numvals_in_block; - block_scan(temp_storage.scan_storage).ExclusiveSum(is_valid, pos, rle_numvals_in_block); - rle_numvals = s->rle_numvals; - if (is_valid) { - uint32_t v; - if (physical_type == BOOLEAN) { - v = s->col.leaf_column->element(val_idx); - } else { - v = s->ck.dict_index[val_idx]; - } - s->vals[(rle_numvals + pos) & (rle_buffer_size - 1)] = v; - } - rle_numvals += rle_numvals_in_block; - __syncthreads(); - if (!is_v2 && physical_type == BOOLEAN) { - PlainBoolEncode(s, rle_numvals, (cur_val_idx == s->page.num_leaf_values), t); - } else { - RleEncode(s, rle_numvals, dict_bits, (cur_val_idx == s->page.num_leaf_values), t); + + // Non-dictionary encoding + uint8_t* dst = s->cur; + + if (is_valid) { + len = dtype_len_out; + if (physical_type == BYTE_ARRAY) { + if (type_id == type_id::STRING) { + len += s->col.leaf_column->element(val_idx).size_bytes(); + } else if (s->col.output_as_byte_array && type_id == type_id::LIST) { + len += + get_element(*s->col.leaf_column, val_idx).size_bytes(); } - __syncthreads(); } - if (t == 0) { s->cur = s->rle_out; } - __syncthreads(); } else { - // Non-dictionary encoding - uint8_t* dst = s->cur; - - if (is_valid) { - len = dtype_len_out; - if (physical_type == BYTE_ARRAY) { - if (type_id == type_id::STRING) { - len += s->col.leaf_column->element(val_idx).size_bytes(); - } else if (s->col.output_as_byte_array && type_id == type_id::LIST) { - len += - get_element(*s->col.leaf_column, val_idx).size_bytes(); + len = 0; + } + uint32_t total_len = 0; + block_scan(temp_storage.scan_storage).ExclusiveSum(len, pos, total_len); + __syncthreads(); + if (t == 0) { s->cur = dst + total_len; } + if (is_valid) { + switch (physical_type) { + case INT32: [[fallthrough]]; + case FLOAT: { + auto const v = [dtype_len = dtype_len_in, + idx = val_idx, + col = s->col.leaf_column, + scale = s->col.ts_scale == 0 ? 1 : s->col.ts_scale]() -> int32_t { + switch (dtype_len) { + case 8: return col->element(idx) * scale; + case 4: return col->element(idx) * scale; + case 2: return col->element(idx) * scale; + default: return col->element(idx) * scale; + } + }(); + + dst[pos + 0] = v; + dst[pos + 1] = v >> 8; + dst[pos + 2] = v >> 16; + dst[pos + 3] = v >> 24; + } break; + case INT64: { + int64_t v = s->col.leaf_column->element(val_idx); + int32_t ts_scale = s->col.ts_scale; + if (ts_scale != 0) { + if (ts_scale < 0) { + v /= -ts_scale; + } else { + v *= ts_scale; + } + } + dst[pos + 0] = v; + dst[pos + 1] = v >> 8; + dst[pos + 2] = v >> 16; + dst[pos + 3] = v >> 24; + dst[pos + 4] = v >> 32; + dst[pos + 5] = v >> 40; + dst[pos + 6] = v >> 48; + dst[pos + 7] = v >> 56; + } break; + case INT96: { + int64_t v = s->col.leaf_column->element(val_idx); + int32_t ts_scale = s->col.ts_scale; + if (ts_scale != 0) { + if (ts_scale < 0) { + v /= -ts_scale; + } else { + v *= ts_scale; + } } - } - } else { - len = 0; - } - uint32_t total_len = 0; - block_scan(temp_storage.scan_storage).ExclusiveSum(len, pos, total_len); - __syncthreads(); - if (t == 0) { s->cur = dst + total_len; } - if (is_valid) { - switch (physical_type) { - case INT32: [[fallthrough]]; - case FLOAT: { - auto const v = [dtype_len = dtype_len_in, - idx = val_idx, - col = s->col.leaf_column, - scale = s->col.ts_scale == 0 ? 1 : s->col.ts_scale]() -> int32_t { - switch (dtype_len) { - case 8: return col->element(idx) * scale; - case 4: return col->element(idx) * scale; - case 2: return col->element(idx) * scale; - default: return col->element(idx) * scale; - } - }(); - dst[pos + 0] = v; - dst[pos + 1] = v >> 8; - dst[pos + 2] = v >> 16; - dst[pos + 3] = v >> 24; - } break; - case INT64: { - int64_t v = s->col.leaf_column->element(val_idx); - int32_t ts_scale = s->col.ts_scale; - if (ts_scale != 0) { - if (ts_scale < 0) { - v /= -ts_scale; - } else { - v *= ts_scale; - } + auto const [last_day_nanos, julian_days] = [&] { + using namespace cuda::std::chrono; + switch (s->col.leaf_column->type().id()) { + case type_id::TIMESTAMP_SECONDS: + case type_id::TIMESTAMP_MILLISECONDS: { + return julian_days_with_time(v); + } break; + case type_id::TIMESTAMP_MICROSECONDS: + case type_id::TIMESTAMP_NANOSECONDS: { + return julian_days_with_time(v); + } break; } - dst[pos + 0] = v; - dst[pos + 1] = v >> 8; - dst[pos + 2] = v >> 16; - dst[pos + 3] = v >> 24; - dst[pos + 4] = v >> 32; - dst[pos + 5] = v >> 40; - dst[pos + 6] = v >> 48; - dst[pos + 7] = v >> 56; - } break; - case INT96: { - int64_t v = s->col.leaf_column->element(val_idx); - int32_t ts_scale = s->col.ts_scale; - if (ts_scale != 0) { - if (ts_scale < 0) { - v /= -ts_scale; - } else { - v *= ts_scale; - } + return julian_days_with_time(0); + }(); + + // the 12 bytes of fixed length data. + v = last_day_nanos.count(); + dst[pos + 0] = v; + dst[pos + 1] = v >> 8; + dst[pos + 2] = v >> 16; + dst[pos + 3] = v >> 24; + dst[pos + 4] = v >> 32; + dst[pos + 5] = v >> 40; + dst[pos + 6] = v >> 48; + dst[pos + 7] = v >> 56; + uint32_t w = julian_days.count(); + dst[pos + 8] = w; + dst[pos + 9] = w >> 8; + dst[pos + 10] = w >> 16; + dst[pos + 11] = w >> 24; + } break; + + case DOUBLE: { + auto v = s->col.leaf_column->element(val_idx); + memcpy(dst + pos, &v, 8); + } break; + case BYTE_ARRAY: { + auto const bytes = [](cudf::type_id const type_id, + column_device_view const* leaf_column, + uint32_t const val_idx) -> void const* { + switch (type_id) { + case type_id::STRING: + return reinterpret_cast( + leaf_column->element(val_idx).data()); + case type_id::LIST: + return reinterpret_cast( + get_element(*(leaf_column), val_idx).data()); + default: CUDF_UNREACHABLE("invalid type id for byte array writing!"); } + }(type_id, s->col.leaf_column, val_idx); + uint32_t v = len - 4; // string length + dst[pos + 0] = v; + dst[pos + 1] = v >> 8; + dst[pos + 2] = v >> 16; + dst[pos + 3] = v >> 24; + if (v != 0) memcpy(dst + pos + 4, bytes, v); + } break; + case FIXED_LEN_BYTE_ARRAY: { + if (type_id == type_id::DECIMAL128) { + // When using FIXED_LEN_BYTE_ARRAY for decimals, the rep is encoded in big-endian + auto const v = s->col.leaf_column->element(val_idx).value(); + auto const v_char_ptr = reinterpret_cast(&v); + thrust::copy(thrust::seq, + thrust::make_reverse_iterator(v_char_ptr + sizeof(v)), + thrust::make_reverse_iterator(v_char_ptr), + dst + pos); + } + } break; + } + } + __syncthreads(); + } - auto const [last_day_nanos, julian_days] = [&] { - using namespace cuda::std::chrono; - switch (s->col.leaf_column->type().id()) { - case type_id::TIMESTAMP_SECONDS: - case type_id::TIMESTAMP_MILLISECONDS: { - return julian_days_with_time(v); - } break; - case type_id::TIMESTAMP_MICROSECONDS: - case type_id::TIMESTAMP_NANOSECONDS: { - return julian_days_with_time(v); - } break; - } - return julian_days_with_time(0); - }(); - - // the 12 bytes of fixed length data. - v = last_day_nanos.count(); - dst[pos + 0] = v; - dst[pos + 1] = v >> 8; - dst[pos + 2] = v >> 16; - dst[pos + 3] = v >> 24; - dst[pos + 4] = v >> 32; - dst[pos + 5] = v >> 40; - dst[pos + 6] = v >> 48; - dst[pos + 7] = v >> 56; - uint32_t w = julian_days.count(); - dst[pos + 8] = w; - dst[pos + 9] = w >> 8; - dst[pos + 10] = w >> 16; - dst[pos + 11] = w >> 24; - } break; + uint32_t const valid_count = block_reduce(temp_storage.reduce_storage).Sum(num_valid); - case DOUBLE: { - auto v = s->col.leaf_column->element(val_idx); - memcpy(dst + pos, &v, 8); - } break; - case BYTE_ARRAY: { - auto const bytes = [](cudf::type_id const type_id, - column_device_view const* leaf_column, - uint32_t const val_idx) -> void const* { - switch (type_id) { - case type_id::STRING: - return reinterpret_cast( - leaf_column->element(val_idx).data()); - case type_id::LIST: - return reinterpret_cast( - get_element(*(leaf_column), val_idx).data()); - default: CUDF_UNREACHABLE("invalid type id for byte array writing!"); - } - }(type_id, s->col.leaf_column, val_idx); - uint32_t v = len - 4; // string length - dst[pos + 0] = v; - dst[pos + 1] = v >> 8; - dst[pos + 2] = v >> 16; - dst[pos + 3] = v >> 24; - if (v != 0) memcpy(dst + pos + 4, bytes, v); - } break; - case FIXED_LEN_BYTE_ARRAY: { - if (type_id == type_id::DECIMAL128) { - // When using FIXED_LEN_BYTE_ARRAY for decimals, the rep is encoded in big-endian - auto const v = s->col.leaf_column->element(val_idx).value(); - auto const v_char_ptr = reinterpret_cast(&v); - thrust::copy(thrust::seq, - thrust::make_reverse_iterator(v_char_ptr + sizeof(v)), - thrust::make_reverse_iterator(v_char_ptr), - dst + pos); - } - } break; + finish_page_encode( + s, valid_count, s->cur, pages, comp_in, comp_out, comp_results, write_v2_headers); +} + +// DICTIONARY page data encoder +// blockDim(128, 1, 1) +template +__global__ void __launch_bounds__(block_size, 8) + gpuEncodeDictPages(device_span pages, + device_span> comp_in, + device_span> comp_out, + device_span comp_results, + bool write_v2_headers) +{ + __shared__ __align__(8) rle_page_enc_state_s state_g; + using block_reduce = cub::BlockReduce; + using block_scan = cub::BlockScan; + __shared__ union { + typename block_reduce::TempStorage reduce_storage; + typename block_scan::TempStorage scan_storage; + } temp_storage; + + auto* const s = &state_g; + uint32_t t = threadIdx.x; + + if (t == 0) { + state_g = rle_page_enc_state_s{}; + s->page = pages[blockIdx.x]; + s->ck = *s->page.chunk; + s->col = *s->ck.col_desc; + s->rle_len_pos = nullptr; + // get s->cur back to where it was at the end of encoding the rep and def level data + s->cur = + s->page.page_data + s->page.max_hdr_size + s->page.def_lvl_bytes + s->page.rep_lvl_bytes; + // if V1 data page, need space for the RLE length fields + if (s->page.page_type == PageType::DATA_PAGE) { + if (s->col.num_def_level_bits() != 0) { s->cur += RLE_LENGTH_FIELD_LEN; } + if (s->col.num_rep_level_bits() != 0) { s->cur += RLE_LENGTH_FIELD_LEN; } + } + } + __syncthreads(); + + if (BitAnd(s->page.kernel_mask, encode_kernel_mask::DICTIONARY) == 0) { return; } + + // Encode data values + __syncthreads(); + auto const physical_type = s->col.physical_type; + auto const type_id = s->col.leaf_column->type().id(); + auto const dtype_len_out = physical_type_len(physical_type, type_id); + auto const dtype_len_in = [&]() -> uint32_t { + if (physical_type == INT32) { return int32_logical_len(type_id); } + if (physical_type == INT96) { return sizeof(int64_t); } + return dtype_len_out; + }(); + + // TODO assert dict_bits >= 0 + auto const dict_bits = (physical_type == BOOLEAN) ? 1 + : (s->ck.use_dictionary and s->page.page_type != PageType::DICTIONARY_PAGE) + ? s->ck.dict_rle_bits + : -1; + if (t == 0) { + uint8_t* dst = s->cur; + s->rle_run = 0; + s->rle_pos = 0; + s->rle_numvals = 0; + s->rle_out = dst; + s->page.encoding = + determine_encoding(s->page.page_type, physical_type, s->ck.use_dictionary, write_v2_headers); + if (dict_bits >= 0 && physical_type != BOOLEAN) { + dst[0] = dict_bits; + s->rle_out = dst + 1; + } else if (write_v2_headers && physical_type == BOOLEAN) { + // save space for RLE length. we don't know the total length yet. + s->rle_out = dst + RLE_LENGTH_FIELD_LEN; + s->rle_len_pos = dst; + } + s->page_start_val = row_to_value_idx(s->page.start_row, s->col); + s->chunk_start_val = row_to_value_idx(s->ck.start_row, s->col); + } + __syncthreads(); + + uint32_t num_valid = 0; + for (uint32_t cur_val_idx = 0; cur_val_idx < s->page.num_leaf_values;) { + uint32_t nvals = min(s->page.num_leaf_values - cur_val_idx, block_size); + + auto [is_valid, val_idx] = [&]() { + size_type const val_idx_in_block = cur_val_idx + t; + size_type const val_idx_in_leaf_col = s->page_start_val + val_idx_in_block; + + uint32_t const is_valid = (val_idx_in_leaf_col < s->col.leaf_column->size() && + val_idx_in_block < s->page.num_leaf_values) + ? s->col.leaf_column->is_valid(val_idx_in_leaf_col) + : 0; + // need to test for use_dictionary because it might be boolean + uint32_t const val_idx = + (s->ck.use_dictionary) ? val_idx_in_leaf_col - s->chunk_start_val : val_idx_in_leaf_col; + return std::make_tuple(is_valid, val_idx); + }(); + + if (is_valid) { num_valid++; } + cur_val_idx += nvals; + + // Dictionary encoding + if (dict_bits > 0) { + uint32_t rle_numvals; + uint32_t rle_numvals_in_block; + uint32_t pos; + block_scan(temp_storage.scan_storage).ExclusiveSum(is_valid, pos, rle_numvals_in_block); + rle_numvals = s->rle_numvals; + if (is_valid) { + uint32_t v; + if (physical_type == BOOLEAN) { + v = s->col.leaf_column->element(val_idx); + } else { + v = s->ck.dict_index[val_idx]; } + s->vals[rolling_idx(rle_numvals + pos)] = v; + } + rle_numvals += rle_numvals_in_block; + __syncthreads(); + if ((!write_v2_headers) && (physical_type == BOOLEAN)) { + PlainBoolEncode(s, rle_numvals, (cur_val_idx == s->page.num_leaf_values), t); + } else { + RleEncode(s, rle_numvals, dict_bits, (cur_val_idx == s->page.num_leaf_values), t); } __syncthreads(); } + if (t == 0) { s->cur = s->rle_out; } + __syncthreads(); } uint32_t const valid_count = block_reduce(temp_storage.reduce_storage).Sum(num_valid); @@ -1392,37 +1684,137 @@ __global__ void __launch_bounds__(128, 8) __syncwarp(); } - // V2 does not compress rep and def level data - size_t const skip_comp_size = s->page.def_lvl_bytes + s->page.rep_lvl_bytes; + finish_page_encode( + s, valid_count, s->cur, pages, comp_in, comp_out, comp_results, write_v2_headers); +} + +// DELTA_BINARY_PACKED page data encoder +// blockDim(128, 1, 1) +template +__global__ void __launch_bounds__(block_size, 8) + gpuEncodeDeltaBinaryPages(device_span pages, + device_span> comp_in, + device_span> comp_out, + device_span comp_results) +{ + // block of shared memory for value storage and bit packing + __shared__ uleb128_t delta_shared[delta::buffer_size + delta::block_size]; + __shared__ __align__(8) page_enc_state_s<0> state_g; + using block_reduce = cub::BlockReduce; + __shared__ union { + typename block_reduce::TempStorage reduce_storage; + typename delta::index_scan::TempStorage delta_index_tmp; + typename delta::block_reduce::TempStorage delta_reduce_tmp; + typename delta::warp_reduce::TempStorage delta_warp_red_tmp[delta::num_mini_blocks]; + } temp_storage; + + auto* const s = &state_g; + uint32_t t = threadIdx.x; if (t == 0) { - s->page.num_nulls = s->page.num_values - valid_count; - uint8_t* const base = s->page.page_data + s->page.max_hdr_size; - auto const actual_data_size = static_cast(s->cur - base); - if (actual_data_size > s->page.max_data_size) { - CUDF_UNREACHABLE("detected possible page data corruption"); - } - s->page.max_data_size = actual_data_size; - if (not comp_in.empty()) { - comp_in[blockIdx.x] = {base + skip_comp_size, actual_data_size - skip_comp_size}; - comp_out[blockIdx.x] = {s->page.compressed_data + s->page.max_hdr_size + skip_comp_size, - 0}; // size is unused - } - pages[blockIdx.x] = s->page; - if (not comp_results.empty()) { - comp_results[blockIdx.x] = {0, compression_status::FAILURE}; - pages[blockIdx.x].comp_res = &comp_results[blockIdx.x]; + state_g = page_enc_state_s<0>{}; + s->page = pages[blockIdx.x]; + s->ck = *s->page.chunk; + s->col = *s->ck.col_desc; + s->rle_len_pos = nullptr; + // get s->cur back to where it was at the end of encoding the rep and def level data + s->cur = + s->page.page_data + s->page.max_hdr_size + s->page.def_lvl_bytes + s->page.rep_lvl_bytes; + } + __syncthreads(); + + if (BitAnd(s->page.kernel_mask, encode_kernel_mask::DELTA_BINARY) == 0) { return; } + + // Encode data values + __syncthreads(); + auto const physical_type = s->col.physical_type; + auto const type_id = s->col.leaf_column->type().id(); + auto const dtype_len_out = physical_type_len(physical_type, type_id); + auto const dtype_len_in = [&]() -> uint32_t { + if (physical_type == INT32) { return int32_logical_len(type_id); } + if (physical_type == INT96) { return sizeof(int64_t); } + return dtype_len_out; + }(); + + if (t == 0) { + uint8_t* dst = s->cur; + s->rle_run = 0; + s->rle_pos = 0; + s->rle_numvals = 0; + s->rle_out = dst; + s->page.encoding = Encoding::DELTA_BINARY_PACKED; + s->page_start_val = row_to_value_idx(s->page.start_row, s->col); + s->chunk_start_val = row_to_value_idx(s->ck.start_row, s->col); + } + __syncthreads(); + + // need to know the number of valid values for the null values calculation and to size + // the delta binary encoder. + uint32_t valid_count = 0; + if (not s->col.leaf_column->nullable()) { + valid_count = s->page.num_leaf_values; + } else { + uint32_t num_valid = 0; + for (uint32_t cur_val_idx = 0; cur_val_idx < s->page.num_leaf_values;) { + uint32_t const nvals = min(s->page.num_leaf_values - cur_val_idx, block_size); + size_type const val_idx_in_block = cur_val_idx + t; + size_type const val_idx_in_leaf_col = s->page_start_val + val_idx_in_block; + + if (val_idx_in_leaf_col < s->col.leaf_column->size() && + val_idx_in_block < s->page.num_leaf_values && + s->col.leaf_column->is_valid(val_idx_in_leaf_col)) { + num_valid++; + } + cur_val_idx += nvals; } + valid_count = block_reduce(temp_storage.reduce_storage).Sum(num_valid); } - // copy over uncompressed data - if (skip_comp_size != 0 && not comp_in.empty()) { - uint8_t const* const src = s->page.page_data + s->page.max_hdr_size; - uint8_t* const dst = s->page.compressed_data + s->page.max_hdr_size; - for (int i = t; i < skip_comp_size; i += block_size) { - dst[i] = src[i]; + uint8_t const* delta_ptr = nullptr; // this will be the end of delta block pointer + + if (physical_type == INT32) { + switch (dtype_len_in) { + case 8: { + // only DURATIONS map to 8 bytes, so safe to just use signed here? + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + break; + } + case 4: { + if (type_id == type_id::UINT32) { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } else { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } + break; + } + case 2: { + if (type_id == type_id::UINT16) { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } else { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } + break; + } + case 1: { + if (type_id == type_id::UINT8) { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } else { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } + break; + } + default: CUDF_UNREACHABLE("invalid dtype_len_in when encoding DELTA_BINARY_PACKED"); + } + } else { + if (type_id == type_id::UINT64) { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); + } else { + delta_ptr = delta_encode(s, valid_count, delta_shared, &temp_storage); } } + + finish_page_encode( + s, valid_count, delta_ptr, pages, comp_in, comp_out, comp_results, true); } constexpr int decide_compression_warps_in_block = 4; @@ -1457,7 +1849,8 @@ __global__ void __launch_bounds__(decide_compression_block_size) for (auto page_id = lane_id; page_id < num_pages; page_id += cudf::detail::warp_size) { auto const& curr_page = ck_g[warp_id].pages[page_id]; auto const page_data_size = curr_page.max_data_size; - auto const lvl_bytes = curr_page.def_lvl_bytes + curr_page.rep_lvl_bytes; + auto const is_v2 = curr_page.page_type == PageType::DATA_PAGE_V2; + auto const lvl_bytes = is_v2 ? curr_page.def_lvl_bytes + curr_page.rep_lvl_bytes : 0; uncompressed_data_size += page_data_size; if (auto comp_res = curr_page.comp_res; comp_res != nullptr) { compressed_data_size += comp_res->bytes_written + lvl_bytes; @@ -1920,7 +2313,8 @@ __global__ void __launch_bounds__(128) } uncompressed_page_size = page_g.max_data_size; if (ck_g.is_compressed) { - auto const lvl_bytes = page_g.def_lvl_bytes + page_g.rep_lvl_bytes; + auto const is_v2 = page_g.page_type == PageType::DATA_PAGE_V2; + auto const lvl_bytes = is_v2 ? page_g.def_lvl_bytes + page_g.rep_lvl_bytes : 0; hdr_start = page_g.compressed_data; compressed_page_size = static_cast(comp_results[blockIdx.x].bytes_written) + lvl_bytes; @@ -2155,6 +2549,10 @@ constexpr __device__ void* align8(void* ptr) return static_cast(ptr) - algn; } +struct mask_tform { + __device__ uint32_t operator()(EncPage const& p) { return static_cast(p.kernel_mask); } +}; + } // namespace // blockDim(1, 1, 1) @@ -2257,8 +2655,9 @@ void InitFragmentStatistics(device_span groups, rmm::cuda_stream_view stream) { int const num_fragments = fragments.size(); - int const dim = util::div_rounding_up_safe(num_fragments, 128 / cudf::detail::warp_size); - gpuInitFragmentStats<<>>(groups, fragments); + int const dim = + util::div_rounding_up_safe(num_fragments, encode_block_size / cudf::detail::warp_size); + gpuInitFragmentStats<<>>(groups, fragments); } void InitEncoderPages(device_2dspan chunks, @@ -2277,18 +2676,18 @@ void InitEncoderPages(device_2dspan chunks, { auto num_rowgroups = chunks.size().first; dim3 dim_grid(num_columns, num_rowgroups); // 1 threadblock per rowgroup - gpuInitPages<<>>(chunks, - pages, - page_sizes, - comp_page_sizes, - col_desc, - page_grstats, - chunk_grstats, - num_columns, - max_page_size_bytes, - max_page_size_rows, - page_align, - write_v2_headers); + gpuInitPages<<>>(chunks, + pages, + page_sizes, + comp_page_sizes, + col_desc, + page_grstats, + chunk_grstats, + num_columns, + max_page_size_bytes, + max_page_size_rows, + page_align, + write_v2_headers); } void EncodePages(device_span pages, @@ -2299,10 +2698,43 @@ void EncodePages(device_span pages, rmm::cuda_stream_view stream) { auto num_pages = pages.size(); + + // determine which kernels to invoke + auto mask_iter = thrust::make_transform_iterator(pages.begin(), mask_tform{}); + uint32_t kernel_mask = thrust::reduce( + rmm::exec_policy(stream), mask_iter, mask_iter + pages.size(), 0U, thrust::bit_or{}); + + // get the number of streams we need from the pool + int nkernels = std::bitset<32>(kernel_mask).count(); + auto streams = cudf::detail::fork_streams(stream, nkernels); + // A page is part of one column. This is launching 1 block per page. 1 block will exclusively // deal with one datatype. - gpuEncodePages<128><<>>( - pages, comp_in, comp_out, comp_results, write_v2_headers); + + int s_idx = 0; + if (BitAnd(kernel_mask, encode_kernel_mask::PLAIN) != 0) { + auto const strm = streams[s_idx++]; + gpuEncodePageLevels<<>>( + pages, write_v2_headers, encode_kernel_mask::PLAIN); + gpuEncodePages<<>>( + pages, comp_in, comp_out, comp_results, write_v2_headers); + } + if (BitAnd(kernel_mask, encode_kernel_mask::DELTA_BINARY) != 0) { + auto const strm = streams[s_idx++]; + gpuEncodePageLevels<<>>( + pages, write_v2_headers, encode_kernel_mask::DELTA_BINARY); + gpuEncodeDeltaBinaryPages + <<>>(pages, comp_in, comp_out, comp_results); + } + if (BitAnd(kernel_mask, encode_kernel_mask::DICTIONARY) != 0) { + auto const strm = streams[s_idx++]; + gpuEncodePageLevels<<>>( + pages, write_v2_headers, encode_kernel_mask::DICTIONARY); + gpuEncodeDictPages<<>>( + pages, comp_in, comp_out, comp_results, write_v2_headers); + } + + cudf::detail::join_streams(streams, stream); } void DecideCompression(device_span chunks, rmm::cuda_stream_view stream) @@ -2320,7 +2752,7 @@ void EncodePageHeaders(device_span pages, { // TODO: single thread task. No need for 128 threads/block. Earlier it used to employ rest of the // threads to coop load structs - gpuEncodePageHeaders<<>>( + gpuEncodePageHeaders<<>>( pages, comp_results, page_stats, chunk_stats); } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 6a93fec0c46..048f1a73a9c 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -88,6 +88,37 @@ struct input_column_info { auto nesting_depth() const { return nesting.size(); } }; +// The delta encodings use ULEB128 integers, but parquet only uses max 64 bits. +using uleb128_t = uint64_t; +using zigzag128_t = int64_t; + +// this is in C++23 +#if !defined(__cpp_lib_is_scoped_enum) +template > +struct is_scoped_enum { + static const bool value = not std::is_convertible_v>; +}; + +template +struct is_scoped_enum { + static const bool value = false; +}; +#else +using std::is_scoped_enum; +#endif + +// helpers to do bit operations on scoped enums +template ::value and std::is_same_v) or + (is_scoped_enum::value and std::is_same_v) or + (is_scoped_enum::value and std::is_same_v)>* = + nullptr> +constexpr uint32_t BitAnd(T1 a, T2 b) +{ + return static_cast(a) & static_cast(b); +} + /** * @brief Enums for the flags in the page header */ @@ -371,6 +402,17 @@ constexpr uint32_t encoding_to_mask(Encoding encoding) return 1 << static_cast(encoding); } +/** + * @brief Enum of mask bits for the EncPage kernel_mask + * + * Used to control which encode kernels to run. + */ +enum class encode_kernel_mask { + PLAIN = (1 << 0), // Run plain encoding kernel + DICTIONARY = (1 << 1), // Run dictionary encoding kernel + DELTA_BINARY = (1 << 2) // Run DELTA_BINARY_PACKED encoding kernel +}; + /** * @brief Struct describing an encoder column chunk */ @@ -429,10 +471,11 @@ struct EncPage { uint32_t num_leaf_values; //!< Values in page. Different from num_rows in case of nested types uint32_t num_values; //!< Number of def/rep level values in page. Includes null/empty elements in //!< non-leaf levels - uint32_t def_lvl_bytes; //!< Number of bytes of encoded definition level data (V2 only) - uint32_t rep_lvl_bytes; //!< Number of bytes of encoded repetition level data (V2 only) - compression_result* comp_res; //!< Ptr to compression result - uint32_t num_nulls; //!< Number of null values (V2 only) (down here for alignment) + uint32_t def_lvl_bytes; //!< Number of bytes of encoded definition level data (V2 only) + uint32_t rep_lvl_bytes; //!< Number of bytes of encoded repetition level data (V2 only) + compression_result* comp_res; //!< Ptr to compression result + uint32_t num_nulls; //!< Number of null values (V2 only) (down here for alignment) + encode_kernel_mask kernel_mask; //!< Mask used to control which encoding kernels to run }; /** diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index fa85e3a4a1d..2a654bd7e8c 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -353,6 +353,9 @@ struct ParquetWriterSchemaTest : public ParquetWriterTest { template struct ParquetReaderSourceTest : public ParquetReaderTest {}; +template +struct ParquetWriterDeltaTest : public ParquetWriterTest {}; + // Declare typed test cases // TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352 using SupportedTypes = cudf::test::Types; @@ -384,7 +387,6 @@ TYPED_TEST_SUITE(ParquetChunkedWriterNumericTypeTest, SupportedTypes); class ParquetSizedTest : public ::cudf::test::BaseFixtureWithParam {}; // test the allowed bit widths for dictionary encoding -// values chosen to trigger 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, and 24 bit dictionaries INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest, ParquetSizedTest, testing::Range(1, 25), @@ -6698,7 +6700,7 @@ TEST_P(ParquetV2Test, CheckEncodings) // data should be PLAIN for v1, RLE for V2 auto col0_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 2 == 0; }); - // data should be PLAIN for both + // data should be PLAIN for v1, DELTA_BINARY_PACKED for v2 auto col1_data = random_values(num_rows); // data should be PLAIN_DICTIONARY for v1, PLAIN and RLE_DICTIONARY for v2 auto col2_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 1; }); @@ -6733,10 +6735,10 @@ TEST_P(ParquetV2Test, CheckEncodings) // col0 should have RLE for rep/def and data EXPECT_TRUE(chunk0_enc.size() == 1); EXPECT_TRUE(contains(chunk0_enc, Encoding::RLE)); - // col1 should have RLE for rep/def and PLAIN for data + // col1 should have RLE for rep/def and DELTA_BINARY_PACKED for data EXPECT_TRUE(chunk1_enc.size() == 2); EXPECT_TRUE(contains(chunk1_enc, Encoding::RLE)); - EXPECT_TRUE(contains(chunk1_enc, Encoding::PLAIN)); + EXPECT_TRUE(contains(chunk1_enc, Encoding::DELTA_BINARY_PACKED)); // col2 should have RLE for rep/def, PLAIN for dict, and RLE_DICTIONARY for data EXPECT_TRUE(chunk2_enc.size() == 3); EXPECT_TRUE(contains(chunk2_enc, Encoding::RLE)); @@ -6758,6 +6760,104 @@ TEST_P(ParquetV2Test, CheckEncodings) } } +// removing duration_D, duration_s, and timestamp_s as they don't appear to be supported properly. +// see definition of UnsupportedChronoTypes above. +using DeltaDecimalTypes = cudf::test::Types; +using DeltaBinaryTypes = + cudf::test::Concat; +using SupportedDeltaTestTypes = + cudf::test::RemoveIf, DeltaBinaryTypes>; +TYPED_TEST_SUITE(ParquetWriterDeltaTest, SupportedDeltaTestTypes); + +TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypes) +{ + using T = TypeParam; + auto col0 = testdata::ascending(); + auto col1 = testdata::unordered(); + + auto const expected = table_view{{col0, col1}}; + + auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPacked.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) + .write_v2_headers(true) + .dictionary_policy(cudf::io::dictionary_policy::NEVER); + cudf::io::write_parquet(out_opts); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + +TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaTestTypesSliced) +{ + using T = TypeParam; + constexpr int num_rows = 4'000; + auto col0 = testdata::ascending(); + auto col1 = testdata::unordered(); + + auto const expected = table_view{{col0, col1}}; + auto expected_slice = cudf::slice(expected, {num_rows, 2 * num_rows}); + ASSERT_EQ(expected_slice[0].num_rows(), num_rows); + + auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedSliced.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) + .write_v2_headers(true) + .dictionary_policy(cudf::io::dictionary_policy::NEVER); + cudf::io::write_parquet(out_opts); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); +} + +TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced) +{ + using T = TypeParam; + + constexpr int num_slice = 4'000; + constexpr int num_rows = 32 * 1024; + + std::mt19937 gen(6542); + std::bernoulli_distribution bn(0.7f); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); }); + auto values = thrust::make_counting_iterator(0); + + // list + constexpr int vals_per_row = 4; + auto c1_offset_iter = cudf::detail::make_counting_transform_iterator( + 0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; }); + cudf::test::fixed_width_column_wrapper c1_offsets(c1_offset_iter, + c1_offset_iter + num_rows + 1); + cudf::test::fixed_width_column_wrapper c1_vals( + values, values + (num_rows * vals_per_row), valids); + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows); + + auto _c1 = cudf::make_lists_column( + num_rows, c1_offsets.release(), c1_vals.release(), null_count, std::move(null_mask)); + auto c1 = cudf::purge_nonempty_nulls(*_c1); + + auto const expected = table_view{{*c1}}; + auto expected_slice = cudf::slice(expected, {num_slice, 2 * num_slice}); + ASSERT_EQ(expected_slice[0].num_rows(), num_slice); + + auto const filepath = temp_env->get_temp_filepath("DeltaBinaryPackedListSliced.parquet"); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) + .write_v2_headers(true) + .dictionary_policy(cudf::io::dictionary_policy::NEVER); + cudf::io::write_parquet(out_opts); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); +} + TEST_F(ParquetWriterTest, EmptyMinStringStatistics) { char const* const min_val = ""; From 0341bb7cebfab1fb45d4a53cfc495265bb96ee3a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 20 Oct 2023 10:46:02 -0700 Subject: [PATCH 71/76] Expose streams in public null mask APIs (#14263) Contributes to #925 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/14263 --- cpp/include/cudf/lists/detail/scatter.cuh | 6 +- cpp/include/cudf/null_mask.hpp | 24 +++++- cpp/src/binaryop/binaryop.cpp | 2 +- cpp/src/bitmask/null_mask.cu | 38 +++++++--- cpp/src/copying/concatenate.cu | 2 +- cpp/src/copying/scatter.cu | 5 +- cpp/src/groupby/hash/groupby.cu | 3 +- cpp/src/lists/contains.cu | 16 ++-- cpp/src/merge/merge.cu | 2 +- cpp/src/round/round.cu | 16 +++- cpp/src/search/contains_column.cu | 2 +- cpp/src/strings/replace/multi.cu | 2 +- cpp/src/strings/split/split_re.cu | 2 +- cpp/src/strings/split/split_record.cu | 6 +- cpp/src/unary/cast_ops.cu | 8 +- cpp/src/unary/math_ops.cu | 8 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/null_mask_test.cpp | 92 +++++++++++++++++++++++ 18 files changed, 191 insertions(+), 44 deletions(-) create mode 100644 cpp/tests/streams/null_mask_test.cpp diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index f04b2fda2bf..ff148c59a23 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -20,9 +20,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -130,8 +130,8 @@ std::unique_ptr scatter_impl(rmm::device_uvector cons std::vector> children; children.emplace_back(std::move(offsets_column)); children.emplace_back(std::move(child_column)); - auto null_mask = - target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr}; + auto null_mask = target.has_nulls() ? cudf::detail::copy_bitmask(target, stream, mr) + : rmm::device_buffer{0, stream, mr}; // The output column from this function only has null masks copied from the target columns. // That is still not a correct final null mask for the scatter result. diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 672f479ad53..524296e60ca 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -80,6 +81,7 @@ size_type num_bitmask_words(size_type number_of_bits); * * @param size The number of elements to be represented by the mask * @param state The desired state of the mask + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return A `device_buffer` for use as a null bitmask * satisfying the desired size and state @@ -87,6 +89,7 @@ size_type num_bitmask_words(size_type number_of_bits); rmm::device_buffer create_null_mask( size_type size, mask_state state, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -100,8 +103,13 @@ rmm::device_buffer create_null_mask( * @param begin_bit Index of the first bit to set (inclusive) * @param end_bit Index of the last bit to set (exclusive) * @param valid If true set all entries to valid; otherwise, set all to null + * @param stream CUDA stream used for device memory operations and kernel launches */ -void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid); +void set_null_mask(bitmask_type* bitmask, + size_type begin_bit, + size_type end_bit, + bool valid, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Creates a `device_buffer` from a slice of bitmask defined by a range @@ -115,6 +123,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit * @param mask Bitmask residing in device memory whose bits will be copied * @param begin_bit Index of the first bit to be copied (inclusive) * @param end_bit Index of the last bit to be copied (exclusive) + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return A `device_buffer` containing the bits * `[begin_bit, end_bit)` from `mask`. @@ -123,6 +132,7 @@ rmm::device_buffer copy_bitmask( bitmask_type const* mask, size_type begin_bit, size_type end_bit, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -132,12 +142,14 @@ rmm::device_buffer copy_bitmask( * Returns empty `device_buffer` if the column is not nullable * * @param view Column view whose bitmask needs to be copied + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return A `device_buffer` containing the bits * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask. */ rmm::device_buffer copy_bitmask( column_view const& view, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -148,11 +160,13 @@ rmm::device_buffer copy_bitmask( * If no column in the table is nullable, an empty bitmask is returned. * * @param view The table of columns + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return A pair of resulting bitmask and count of unset bits */ std::pair bitmask_and( table_view const& view, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -163,11 +177,13 @@ std::pair bitmask_and( * If no column in the table is nullable, an empty bitmask is returned. * * @param view The table of columns + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return A pair of resulting bitmask and count of unset bits */ std::pair bitmask_or( table_view const& view, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -183,8 +199,12 @@ std::pair bitmask_or( * @param bitmask Validity bitmask residing in device memory. * @param start Index of the first bit to count (inclusive). * @param stop Index of the last bit to count (exclusive). + * @param stream CUDA stream used for device memory operations and kernel launches * @return The number of null elements in the specified range. */ -cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop); +cudf::size_type null_count(bitmask_type const* bitmask, + size_type start, + size_type stop, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 6b413ab2be4..53b04c4ca80 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -366,7 +366,7 @@ std::unique_ptr binary_operation(column_view const& lhs, CUDF_EXPECTS((lhs.size() == rhs.size()), "Column sizes don't match"); - auto [new_mask, null_count] = bitmask_and(table_view({lhs, rhs}), stream, mr); + auto [new_mask, null_count] = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr); auto out = make_fixed_width_column(output_type, lhs.size(), std::move(new_mask), null_count, stream, mr); diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 5a0d3e4f120..3ff56eabe1e 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -157,16 +157,21 @@ void set_null_mask(bitmask_type* bitmask, // Create a device_buffer for a null mask rmm::device_buffer create_null_mask(size_type size, mask_state state, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return detail::create_null_mask(size, state, cudf::get_default_stream(), mr); + return detail::create_null_mask(size, state, stream, mr); } // Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true, // or null, otherwise; -void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid) +void set_null_mask(bitmask_type* bitmask, + size_type begin_bit, + size_type end_bit, + bool valid, + rmm::cuda_stream_view stream) { - return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, cudf::get_default_stream()); + return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, stream); } namespace detail { @@ -511,33 +516,46 @@ std::pair bitmask_or(table_view const& view, rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::get_default_stream(), mr); + CUDF_FUNC_RANGE(); + return detail::copy_bitmask(mask, begin_bit, end_bit, stream, mr); } // Create a bitmask from a column view -rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr) +rmm::device_buffer copy_bitmask(column_view const& view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return detail::copy_bitmask(view, cudf::get_default_stream(), mr); + CUDF_FUNC_RANGE(); + return detail::copy_bitmask(view, stream, mr); } std::pair bitmask_and(table_view const& view, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return detail::bitmask_and(view, cudf::get_default_stream(), mr); + CUDF_FUNC_RANGE(); + return detail::bitmask_and(view, stream, mr); } std::pair bitmask_or(table_view const& view, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return detail::bitmask_or(view, cudf::get_default_stream(), mr); + CUDF_FUNC_RANGE(); + return detail::bitmask_or(view, stream, mr); } // Count non-zero bits in the specified range -cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop) +cudf::size_type null_count(bitmask_type const* bitmask, + size_type start, + size_type stop, + rmm::cuda_stream_view stream) { - return detail::null_count(bitmask, start, stop, cudf::get_default_stream()); + CUDF_FUNC_RANGE(); + return detail::null_count(bitmask, start, stop, stream); } } // namespace cudf diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index d08c3025553..9b9e780965a 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -563,7 +563,7 @@ rmm::device_buffer concatenate_masks(host_span views, }); rmm::device_buffer null_mask = - create_null_mask(total_element_count, mask_state::UNINITIALIZED, mr); + cudf::detail::create_null_mask(total_element_count, mask_state::UNINITIALIZED, stream, mr); detail::concatenate_masks(views, static_cast(null_mask.data()), stream); diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 11c27fc86e3..879ddb5048e 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -268,8 +268,9 @@ struct column_scalar_scatterer_impl { // Compute null mask rmm::device_buffer null_mask = - target.nullable() ? copy_bitmask(target, stream, mr) - : create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr); + target.nullable() + ? detail::copy_bitmask(target, stream, mr) + : detail::create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr); column null_mask_stub(data_type{type_id::STRUCT}, target.size(), rmm::device_buffer{}, diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 506832881a9..195c8924c9a 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -410,7 +410,8 @@ void sparse_to_dense_results(table_view const& keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first; + auto row_bitmask = + cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first; bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; bitmask_type const* row_bitmask_ptr = skip_key_rows_with_nulls ? static_cast(row_bitmask.data()) : nullptr; diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 4733a5d63a8..cd2bc493bc7 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -274,12 +275,13 @@ std::unique_ptr index_of(lists_column_view const& lists, rmm::mr::device_memory_resource* mr) { if (!search_key.is_valid(stream)) { - return make_numeric_column(data_type{cudf::type_to_id()}, - lists.size(), - cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr), - lists.size(), - stream, - mr); + return make_numeric_column( + data_type{cudf::type_to_id()}, + lists.size(), + cudf::detail::create_null_mask(lists.size(), mask_state::ALL_NULL, stream, mr), + lists.size(), + stream, + mr); } if (lists.size() == 0) { return make_numeric_column( @@ -337,7 +339,7 @@ std::unique_ptr contains_nulls(lists_column_view const& lists, auto const lists_cv = lists.parent(); auto output = make_numeric_column(data_type{type_to_id()}, lists.size(), - copy_bitmask(lists_cv, stream, mr), + cudf::detail::copy_bitmask(lists_cv, stream, mr), lists_cv.null_count(), stream, mr); diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index c0765b48205..00a2f0bee8f 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -381,7 +381,7 @@ std::unique_ptr column_merger::operator()( // materialize the output buffer rmm::device_buffer validity = lcol.has_nulls() || rcol.has_nulls() - ? create_null_mask(merged_size, mask_state::UNINITIALIZED, stream, mr) + ? detail::create_null_mask(merged_size, mask_state::UNINITIALIZED, stream, mr) : rmm::device_buffer{}; if (lcol.has_nulls() || rcol.has_nulls()) { materialize_bitmask(lcol, diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 41cce57d55b..8a6367a1f87 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -219,8 +219,12 @@ std::unique_ptr round_with(column_view const& input, if (decimal_places >= 0 && std::is_integral_v) return std::make_unique(input, stream, mr); - auto result = cudf::make_fixed_width_column( - input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); + auto result = cudf::make_fixed_width_column(input.type(), + input.size(), + detail::copy_bitmask(input, stream, mr), + input.null_count(), + stream, + mr); auto out_view = result->mutable_view(); T const n = std::pow(10, std::abs(decimal_places)); @@ -256,8 +260,12 @@ std::unique_ptr round_with(column_view const& input, if (input.type().scale() > -decimal_places) return cudf::detail::cast(input, result_type, stream, mr); - auto result = cudf::make_fixed_width_column( - result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); + auto result = cudf::make_fixed_width_column(result_type, + input.size(), + detail::copy_bitmask(input, stream, mr), + input.null_count(), + stream, + mr); auto out_view = result->mutable_view(); diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu index 85971647434..b8c7d058535 100644 --- a/cpp/src/search/contains_column.cu +++ b/cpp/src/search/contains_column.cu @@ -42,7 +42,7 @@ struct contains_column_dispatch { stream, mr); return std::make_unique( - std::move(result_v), copy_bitmask(needles, stream, mr), needles.null_count()); + std::move(result_v), detail::copy_bitmask(needles, stream, mr), needles.null_count()); } }; diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index ee47932100a..f80ace57c69 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -383,7 +383,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in std::move(offsets), std::move(chars->release().children.back()), input.null_count(), - copy_bitmask(input.parent(), stream, mr)); + cudf::detail::copy_bitmask(input.parent(), stream, mr)); } /** diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index 913aec79758..045aac279e6 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -290,7 +290,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, std::move(offsets), std::move(strings_output), input.null_count(), - copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input.parent(), stream, mr), stream, mr); } diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index 52f27c68111..7a0cfb9ef41 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -57,7 +57,7 @@ std::unique_ptr split_record_fn(strings_column_view const& input, std::move(offsets), std::move(results), input.null_count(), - copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input.parent(), stream, mr), stream, mr); } @@ -72,7 +72,7 @@ std::unique_ptr split_record_fn(strings_column_view const& input, std::move(offsets), std::move(strings_child), input.null_count(), - copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input.parent(), stream, mr), stream, mr); } @@ -160,7 +160,7 @@ std::unique_ptr whitespace_split_record_fn(strings_column_view const& in std::move(offsets), std::move(strings_output), input.null_count(), - copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input.parent(), stream, mr), stream, mr); } diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 1c81f266200..6fa87b1f709 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -194,7 +194,7 @@ std::unique_ptr rescale(column_view input, auto const scalar = make_fixed_point_scalar(0, scale_type{scale}, stream); auto output_column = make_column_from_scalar(*scalar, input.size(), stream, mr); if (input.nullable()) { - auto const null_mask = copy_bitmask(input, stream, mr); + auto const null_mask = detail::copy_bitmask(input, stream, mr); output_column->set_null_mask(std::move(null_mask), input.null_count()); } return output_column; @@ -255,7 +255,7 @@ struct dispatch_unary_cast_to { std::make_unique(type, size, rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - copy_bitmask(input, stream, mr), + detail::copy_bitmask(input, stream, mr), input.null_count()); mutable_column_view output_mutable = *output; @@ -285,7 +285,7 @@ struct dispatch_unary_cast_to { std::make_unique(type, size, rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - copy_bitmask(input, stream, mr), + detail::copy_bitmask(input, stream, mr), input.null_count()); mutable_column_view output_mutable = *output; @@ -334,7 +334,7 @@ struct dispatch_unary_cast_to { auto output = std::make_unique(cudf::data_type{type.id(), input.type().scale()}, size, rmm::device_buffer{size * cudf::size_of(type), stream}, - copy_bitmask(input, stream, mr), + detail::copy_bitmask(input, stream, mr), input.null_count()); mutable_column_view output_mutable = *output; diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index d0cae81a9c8..d84e0171b49 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -291,8 +291,12 @@ std::unique_ptr unary_op_with(column_view const& input, std::is_same_v>)) return std::make_unique(input, stream, mr); - auto result = cudf::make_fixed_width_column( - input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); + auto result = cudf::make_fixed_width_column(input.type(), + input.size(), + detail::copy_bitmask(input, stream, mr), + input.null_count(), + stream, + mr); auto out_view = result->mutable_view(); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3e30db7abcb..16e7239ebd8 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -629,6 +629,7 @@ ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/null_mask_test.cpp b/cpp/tests/streams/null_mask_test.cpp new file mode 100644 index 00000000000..7e59201c8cf --- /dev/null +++ b/cpp/tests/streams/null_mask_test.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +class NullMaskTest : public cudf::test::BaseFixture {}; + +TEST_F(NullMaskTest, CreateNullMask) +{ + cudf::create_null_mask(10, cudf::mask_state::ALL_VALID, cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, SetNullMask) +{ + cudf::test::fixed_width_column_wrapper col({0, 1, 0, 1, 1}, + {true, false, true, false, false}); + + cudf::set_null_mask(static_cast(col).null_mask(), + 0, + 3, + false, + cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, CopyBitmask) +{ + cudf::test::fixed_width_column_wrapper const col({0, 1, 0, 1, 1}, + {true, false, true, false, false}); + + cudf::copy_bitmask( + static_cast(col).null_mask(), 0, 3, cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, CopyBitmaskFromColumn) +{ + cudf::test::fixed_width_column_wrapper const col({0, 1, 0, 1, 1}, + {true, false, true, false, false}); + + cudf::copy_bitmask(col, cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, BitMaskAnd) +{ + cudf::test::fixed_width_column_wrapper const col1({0, 1, 0, 1, 1}, + {true, false, true, false, false}); + cudf::test::fixed_width_column_wrapper const col2({0, 1, 0, 1, 1}, + {true, true, false, false, true}); + + auto tbl = cudf::table_view{{col1, col2}}; + cudf::bitmask_and(tbl, cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, BitMaskOr) +{ + cudf::test::fixed_width_column_wrapper const col1({0, 1, 0, 1, 1}, + {true, false, true, false, false}); + cudf::test::fixed_width_column_wrapper const col2({0, 1, 0, 1, 1}, + {true, true, false, false, true}); + + auto tbl = cudf::table_view{{col1, col2}}; + cudf::bitmask_or(tbl, cudf::test::get_default_stream()); +} + +TEST_F(NullMaskTest, NullCount) +{ + cudf::test::fixed_width_column_wrapper const col({0, 1, 0, 1, 1}, + {true, true, false, false, true}); + + cudf::null_count( + static_cast(col).null_mask(), 0, 4, cudf::test::get_default_stream()); +} From e7c6365a4976881dc3cf0bcbfa254eb664cfe877 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 20 Oct 2023 11:32:38 -0700 Subject: [PATCH 72/76] Detect and report errors in Parquet header parsing (#14237) Fixes #13656. Uses the error reporting introduced in #14167 to report errors in header parsing. Authors: - Ed Seidl (https://github.com/etseidl) - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/14237 --- cpp/src/io/parquet/error.hpp | 77 ++++++++++++++++++ cpp/src/io/parquet/page_data.cu | 5 +- cpp/src/io/parquet/page_delta_decode.cu | 5 +- cpp/src/io/parquet/page_hdr.cu | 58 ++++++------- cpp/src/io/parquet/page_string_decode.cu | 5 +- cpp/src/io/parquet/parquet_gpu.hpp | 32 +++++++- cpp/src/io/parquet/reader_impl.cpp | 11 ++- cpp/src/io/parquet/reader_impl_preprocess.cu | 44 ++++------ .../cudf/tests/data/parquet/bad_dict.parquet | Bin 0 -> 2850 bytes python/cudf/cudf/tests/test_parquet.py | 8 ++ 10 files changed, 170 insertions(+), 75 deletions(-) create mode 100644 cpp/src/io/parquet/error.hpp create mode 100644 python/cudf/cudf/tests/data/parquet/bad_dict.parquet diff --git a/cpp/src/io/parquet/error.hpp b/cpp/src/io/parquet/error.hpp new file mode 100644 index 00000000000..92b5eebe9fd --- /dev/null +++ b/cpp/src/io/parquet/error.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace cudf::io::parquet { + +/** + * @brief Wrapper around a `rmm::device_scalar` for use in reporting errors that occur in + * kernel calls. + * + * The `kernel_error` object is created with a `rmm::cuda_stream_view` which is used throughout + * the object's lifetime. + */ +class kernel_error { + private: + rmm::device_scalar _error_code; + + public: + /** + * @brief Construct a new `kernel_error` with an initial value of 0. + * + * Note: the initial value is set asynchronously. + * + * @throws `rmm::bad_alloc` if allocating the device memory for `initial_value` fails. + * @throws `rmm::cuda_error` if copying `initial_value` to device memory fails. + * + * @param CUDA stream to use + */ + kernel_error(rmm::cuda_stream_view stream) : _error_code{0, stream} {} + + /** + * @brief Return a pointer to the device memory for the error + */ + [[nodiscard]] auto data() { return _error_code.data(); } + + /** + * @brief Return the current value of the error + * + * This uses the stream used to create this instance. This does a synchronize on the stream + * this object was instantiated with. + */ + [[nodiscard]] auto value() const { return _error_code.value(_error_code.stream()); } + + /** + * @brief Return a hexadecimal string representation of the current error code + * + * Returned string will have "0x" prepended. + */ + [[nodiscard]] std::string str() const + { + std::stringstream sstream; + sstream << std::hex << value(); + return "0x" + sstream.str(); + } +}; + +} // namespace cudf::io::parquet diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index cce3659b902..a783b489c02 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -599,10 +599,7 @@ __global__ void __launch_bounds__(decode_block_size) } __syncthreads(); } - if (t == 0 and s->error != 0) { - cuda::atomic_ref ref{*error_code}; - ref.fetch_or(s->error, cuda::std::memory_order_relaxed); - } + if (t == 0 and s->error != 0) { set_error(s->error, error_code); } } struct mask_tform { diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index d25684a59f3..bb5e5066b69 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -151,10 +151,7 @@ __global__ void __launch_bounds__(96) __syncthreads(); } - if (t == 0 and s->error != 0) { - cuda::atomic_ref ref{*error_code}; - ref.fetch_or(s->error, cuda::std::memory_order_relaxed); - } + if (t == 0 and s->error != 0) { set_error(s->error, error_code); } } } // anonymous namespace diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index eae8e05e61e..22add2fffc6 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -16,6 +16,9 @@ #include "parquet_gpu.hpp" #include + +#include + #include #include @@ -25,23 +28,6 @@ namespace cudf::io::parquet::detail { // Minimal thrift implementation for parsing page headers // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md -static const __device__ __constant__ uint8_t g_list2struct[16] = {0, - 1, - 2, - ST_FLD_BYTE, - ST_FLD_DOUBLE, - 5, - ST_FLD_I16, - 7, - ST_FLD_I32, - 9, - ST_FLD_I64, - ST_FLD_BINARY, - ST_FLD_STRUCT, - ST_FLD_MAP, - ST_FLD_SET, - ST_FLD_LIST}; - struct byte_stream_s { uint8_t const* cur{}; uint8_t const* end{}; @@ -140,12 +126,13 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) case ST_FLD_SET: { // NOTE: skipping a list of lists is not handled auto const c = getb(bs); int n = c >> 4; - if (n == 0xf) n = get_u32(bs); - field_type = g_list2struct[c & 0xf]; - if (field_type == ST_FLD_STRUCT) + if (n == 0xf) { n = get_u32(bs); } + field_type = c & 0xf; + if (field_type == ST_FLD_STRUCT) { struct_depth += n; - else + } else { rep_cnt = n; + } } break; case ST_FLD_STRUCT: struct_depth++; break; } @@ -356,16 +343,20 @@ struct gpuParsePageHeader { */ // blockDim {128,1,1} __global__ void __launch_bounds__(128) - gpuDecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks) + gpuDecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, int32_t* error_code) { + using cudf::detail::warp_size; gpuParsePageHeader parse_page_header; __shared__ byte_stream_s bs_g[4]; - int lane_id = threadIdx.x % 32; - int chunk = (blockIdx.x * 4) + (threadIdx.x / 32); - byte_stream_s* const bs = &bs_g[threadIdx.x / 32]; + int32_t error[4] = {0}; + auto const lane_id = threadIdx.x % warp_size; + auto const warp_id = threadIdx.x / warp_size; + auto const chunk = (blockIdx.x * 4) + warp_id; + auto const bs = &bs_g[warp_id]; - if (chunk < num_chunks and lane_id == 0) bs->ck = chunks[chunk]; + if (chunk < num_chunks and lane_id == 0) { bs->ck = chunks[chunk]; } + if (lane_id == 0) { error[warp_id] = 0; } __syncthreads(); if (chunk < num_chunks) { @@ -376,7 +367,7 @@ __global__ void __launch_bounds__(128) int32_t num_dict_pages = bs->ck.num_dict_pages; PageInfo* page_info; - if (!lane_id) { + if (lane_id == 0) { bs->base = bs->cur = bs->ck.compressed_data; bs->end = bs->base + bs->ck.compressed_size; bs->page.chunk_idx = chunk; @@ -412,6 +403,9 @@ __global__ void __launch_bounds__(128) bs->page.lvl_bytes[level_type::DEFINITION] = 0; bs->page.lvl_bytes[level_type::REPETITION] = 0; if (parse_page_header(bs) && bs->page.compressed_page_size >= 0) { + if (not is_supported_encoding(bs->page.encoding)) { + error[warp_id] |= static_cast(decode_error::UNSUPPORTED_ENCODING); + } switch (bs->page_type) { case PageType::DATA_PAGE: index_out = num_dict_pages + data_page_count; @@ -440,20 +434,25 @@ __global__ void __launch_bounds__(128) } bs->page.page_data = const_cast(bs->cur); bs->cur += bs->page.compressed_page_size; + if (bs->cur > bs->end) { + error[warp_id] |= static_cast(decode_error::DATA_STREAM_OVERRUN); + } bs->page.kernel_mask = kernel_mask_for_page(bs->page, bs->ck); } else { bs->cur = bs->end; } } index_out = shuffle(index_out); - if (index_out >= 0 && index_out < max_num_pages && lane_id == 0) + if (index_out >= 0 && index_out < max_num_pages && lane_id == 0) { page_info[index_out] = bs->page; + } num_values = shuffle(num_values); __syncwarp(); } if (lane_id == 0) { chunks[chunk].num_data_pages = data_page_count; chunks[chunk].num_dict_pages = dictionary_page_count; + if (error[warp_id] != 0) { set_error(error[warp_id], error_code); } } } } @@ -509,11 +508,12 @@ __global__ void __launch_bounds__(128) void __host__ DecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, + int32_t* error_code, rmm::cuda_stream_view stream) { dim3 dim_block(128, 1); dim3 dim_grid((num_chunks + 3) >> 2, 1); // 1 chunk per warp, 4 warps per block - gpuDecodePageHeaders<<>>(chunks, num_chunks); + gpuDecodePageHeaders<<>>(chunks, num_chunks, error_code); } void __host__ BuildStringDictionaryIndex(ColumnChunkDesc* chunks, diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index 4d79770ec34..4c7d8e3c20a 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -745,10 +745,7 @@ __global__ void __launch_bounds__(decode_block_size) auto const offptr = reinterpret_cast(nesting_info_base[leaf_level_index].data_out); block_excl_sum(offptr, value_count, s->page.str_offset); - if (t == 0 and s->error != 0) { - cuda::atomic_ref ref{*error_code}; - ref.fetch_or(s->error, cuda::std::memory_order_relaxed); - } + if (t == 0 and s->error != 0) { set_error(s->error, error_code); } } } // anonymous namespace diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 048f1a73a9c..164e2cea2ed 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -31,6 +31,8 @@ #include #include +#include + #include #include @@ -54,6 +56,30 @@ constexpr int rolling_index(int index) return index % rolling_size; } +// see setupLocalPageInfo() in page_decode.cuh for supported page encodings +constexpr bool is_supported_encoding(Encoding enc) +{ + switch (enc) { + case Encoding::PLAIN: + case Encoding::PLAIN_DICTIONARY: + case Encoding::RLE: + case Encoding::RLE_DICTIONARY: + case Encoding::DELTA_BINARY_PACKED: return true; + default: return false; + } +} + +/** + * @brief Atomically OR `error` into `error_code`. + */ +constexpr void set_error(int32_t error, int32_t* error_code) +{ + if (error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.fetch_or(error, cuda::std::memory_order_relaxed); + } +} + /** * @brief Enum for the different types of errors that can occur during decoding. * @@ -495,9 +521,13 @@ constexpr bool is_string_col(ColumnChunkDesc const& chunk) * * @param[in] chunks List of column chunks * @param[in] num_chunks Number of column chunks + * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ -void DecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, rmm::cuda_stream_view stream); +void DecodePageHeaders(ColumnChunkDesc* chunks, + int32_t num_chunks, + int32_t* error_code, + rmm::cuda_stream_view stream); /** * @brief Launches kernel for building the dictionary index for the column diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index db81222157a..11c20d0e540 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -15,6 +15,7 @@ */ #include "reader_impl.hpp" +#include "error.hpp" #include #include @@ -163,7 +164,8 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) chunk_nested_valids.host_to_device_async(_stream); chunk_nested_data.host_to_device_async(_stream); - rmm::device_scalar error_code(0, _stream); + // create this before we fork streams + kernel_error error_code(_stream); // get the number of streams we need from the pool and tell them to wait on the H2D copies int const nkernels = std::bitset<32>(kernel_mask).count(); @@ -199,11 +201,8 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) page_nesting.device_to_host_async(_stream); page_nesting_decode.device_to_host_async(_stream); - auto const decode_error = error_code.value(_stream); - if (decode_error != 0) { - std::stringstream stream; - stream << std::hex << decode_error; - CUDF_FAIL("Parquet data decode failed with code(s) 0x" + stream.str()); + if (error_code.value() != 0) { + CUDF_FAIL("Parquet data decode failed with code(s) " + error_code.str()); } // for list columns, add the final offset to every offset buffer. diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index ce45f709ee1..8494dc72a1d 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "error.hpp" #include "reader_impl.hpp" #include @@ -263,10 +264,15 @@ void generate_depth_remappings(std::map, std::ve { size_t total_pages = 0; + kernel_error error_code(stream); chunks.host_to_device_async(stream); - DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); + DecodePageHeaders(chunks.device_ptr(), chunks.size(), error_code.data(), stream); chunks.device_to_host_sync(stream); + if (error_code.value() != 0) { + CUDF_FAIL("Parquet header parsing failed with code(s) " + error_code.str()); + } + for (size_t c = 0; c < chunks.size(); c++) { total_pages += chunks[c].num_data_pages + chunks[c].num_dict_pages; } @@ -274,19 +280,6 @@ void generate_depth_remappings(std::map, std::ve return total_pages; } -// see setupLocalPageInfo() in page_data.cu for supported page encodings -constexpr bool is_supported_encoding(Encoding enc) -{ - switch (enc) { - case Encoding::PLAIN: - case Encoding::PLAIN_DICTIONARY: - case Encoding::RLE: - case Encoding::RLE_DICTIONARY: - case Encoding::DELTA_BINARY_PACKED: return true; - default: return false; - } -} - /** * @brief Decode the page information from the given column chunks. * @@ -307,8 +300,14 @@ int decode_page_headers(cudf::detail::hostdevice_vector& chunks page_count += chunks[c].max_num_pages; } + kernel_error error_code(stream); chunks.host_to_device_async(stream); - DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream); + DecodePageHeaders(chunks.device_ptr(), chunks.size(), error_code.data(), stream); + + if (error_code.value() != 0) { + // TODO(ets): if an unsupported encoding was detected, do extra work to figure out which one + CUDF_FAIL("Parquet header parsing failed with code(s)" + error_code.str()); + } // compute max bytes needed for level data auto level_bit_size = @@ -318,22 +317,13 @@ int decode_page_headers(cudf::detail::hostdevice_vector& chunks max(c.level_bits[level_type::REPETITION], c.level_bits[level_type::DEFINITION])); }); // max level data bit size. - int const max_level_bits = thrust::reduce(rmm::exec_policy(stream), + int const max_level_bits = thrust::reduce(rmm::exec_policy(stream), level_bit_size, level_bit_size + chunks.size(), 0, thrust::maximum()); - auto const level_type_size = std::max(1, cudf::util::div_rounding_up_safe(max_level_bits, 8)); - - pages.device_to_host_sync(stream); - // validate page encodings - CUDF_EXPECTS(std::all_of(pages.begin(), - pages.end(), - [](auto const& page) { return is_supported_encoding(page.encoding); }), - "Unsupported page encoding detected"); - - return level_type_size; + return std::max(1, cudf::util::div_rounding_up_safe(max_level_bits, 8)); } /** @@ -771,6 +761,7 @@ void reader::impl::load_and_decompress_data() // decoding of column/page information _pass_itm_data->level_type_size = decode_page_headers(chunks, pages, _stream); + pages.device_to_host_sync(_stream); if (has_compressed_data) { decomp_page_data = decompress_page_data(chunks, pages, _stream); // Free compressed data @@ -795,7 +786,6 @@ void reader::impl::load_and_decompress_data() // std::vector output_info = build_output_column_info(); // the following two allocate functions modify the page data - pages.device_to_host_sync(_stream); { // nesting information (sizes, etc) stored -per page- // note : even for flat schemas, we allocate 1 level of "nesting" info diff --git a/python/cudf/cudf/tests/data/parquet/bad_dict.parquet b/python/cudf/cudf/tests/data/parquet/bad_dict.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5008ac0b22b622cbcf37d5e286c749324fc26535 GIT binary patch literal 2850 zcmb7G&2Af26n0F~Hfi|Brja6Y7Z7#fSP&MJN~pStO)_mfb{z*P0aRU&ubpYebEosu z#$t!MKs*8qR@ng0z>-bZJOz)yito(KnX$tHVfi>WbIxG@;>Aa{l8rMmA*#^4*%p<9v!s%mL$)zf6%rEmMtmdUSFPBN3`0PwrojCrG0SP z8&DqY!J*adS*hZeeQEEv$rT)RqI~1aSa*QmfStqtNXt%^8%RXpzJJiDb$?-rA+Q&fUUhN*ivpf&a%BuK5 z{`mRbU;g;%yN_R4zx_`AM1%hWO+u7}GN%tAOF}4<&~p-AV_AHzvw@jG_OyotUwwdY zj^?L$B+k`j;_0SvRH(;^=gq{}^_3?^IxvwNM_9z#iAChiu%{-FJxy?N7zR*W?u2S~U*sh;78E3`k*L0Ok_ezU>Z$QcY7|9qH=8BN zg3v|#RS1%VaFRo&FN09Sp^9eHAU0{6I3cD*PTtgucrh%vi+`@qD4k5uHvcPxApWaG#x`#T;Ba@bX|{ns~7rcrKXUEJ(Ca ztFIKieadgbAGMu=s0tm2k!s9T~s%| zIb53J9kEvZsCsA3SFvmSCpZ8rwRJpL_SID9d+5u;-fhQBrvT@PW4uazcVlzoX#3V? zt-7(YG4H5`z53rv_-^CHZu|S{W^J8RQrGF`%a!`e6>MPl>LxoVqs=+E!rR#P&#SqI k`2

tF8Lg7WV0tw7y%pyB4SrY>0QJVdXCX(Zk#EKmWZ<*#H0l literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index b892cc62ac4..d2c08246518 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2830,6 +2830,14 @@ def test_parquet_reader_unsupported_page_encoding(datadir): cudf.read_parquet(fname) +def test_parquet_reader_detect_bad_dictionary(datadir): + fname = datadir / "bad_dict.parquet" + + # expect a failure when reading the whole file + with pytest.raises(RuntimeError): + cudf.read_parquet(fname) + + @pytest.mark.parametrize("data", [{"a": [1, 2, 3, 4]}, {"b": [1, None, 2, 3]}]) @pytest.mark.parametrize("force_nullable_schema", [True, False]) def test_parquet_writer_schema_nullability(data, force_nullable_schema): From 253f6a6d5b19387c05368e073954ff773b3d6a39 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 20 Oct 2023 14:05:53 -0700 Subject: [PATCH 73/76] Refactor LogicalType for Parquet (#14264) Continuation of #14097, this PR refactors the LogicalType struct to use the new way of treating unions defined in the parquet thrift (more enum like than struct like). Authors: - Ed Seidl (https://github.com/etseidl) - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14264 --- .../io/parquet/compact_protocol_reader.cpp | 95 +++-------- .../io/parquet/compact_protocol_writer.cpp | 81 +++++---- cpp/src/io/parquet/page_decode.cuh | 3 +- cpp/src/io/parquet/parquet.hpp | 156 +++++++++++------- cpp/src/io/parquet/parquet_gpu.hpp | 30 ++-- cpp/src/io/parquet/reader_impl_chunking.cu | 13 +- cpp/src/io/parquet/reader_impl_helpers.cpp | 104 ++++++------ cpp/src/io/parquet/writer_impl.cu | 107 +++++++----- cpp/tests/io/parquet_test.cpp | 7 +- 9 files changed, 293 insertions(+), 303 deletions(-) diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 1a345ee0750..5a2b8aa8f2a 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -339,61 +339,6 @@ struct parquet_field_struct_list : public parquet_field_list { } }; -// TODO(ets): replace current union handling (which mirrors thrift) to use std::optional fields -// in a struct -/** - * @brief Functor to read a union member from CompactProtocolReader - * - * @tparam is_empty True if tparam `T` type is empty type, else false. - * - * @return True if field types mismatch or if the process of reading a - * union member fails - */ -template -class ParquetFieldUnionFunctor : public parquet_field { - bool& is_set; - T& val; - - public: - ParquetFieldUnionFunctor(int f, bool& b, T& v) : parquet_field(f), is_set(b), val(v) {} - - inline bool operator()(CompactProtocolReader* cpr, int field_type) - { - if (field_type != ST_FLD_STRUCT) { - return true; - } else { - is_set = true; - return !cpr->read(&val); - } - } -}; - -template -class ParquetFieldUnionFunctor : public parquet_field { - bool& is_set; - T& val; - - public: - ParquetFieldUnionFunctor(int f, bool& b, T& v) : parquet_field(f), is_set(b), val(v) {} - - inline bool operator()(CompactProtocolReader* cpr, int field_type) - { - if (field_type != ST_FLD_STRUCT) { - return true; - } else { - is_set = true; - cpr->skip_struct_field(field_type); - return false; - } - } -}; - -template -ParquetFieldUnionFunctor> ParquetFieldUnion(int f, bool& b, T& v) -{ - return ParquetFieldUnionFunctor>(f, b, v); -} - /** * @brief Functor to read a binary from CompactProtocolReader * @@ -595,34 +540,38 @@ bool CompactProtocolReader::read(FileMetaData* f) bool CompactProtocolReader::read(SchemaElement* s) { + using optional_converted_type = + parquet_field_optional>; + using optional_logical_type = + parquet_field_optional>; auto op = std::make_tuple(parquet_field_enum(1, s->type), parquet_field_int32(2, s->type_length), parquet_field_enum(3, s->repetition_type), parquet_field_string(4, s->name), parquet_field_int32(5, s->num_children), - parquet_field_enum(6, s->converted_type), + optional_converted_type(6, s->converted_type), parquet_field_int32(7, s->decimal_scale), parquet_field_int32(8, s->decimal_precision), parquet_field_optional(9, s->field_id), - parquet_field_struct(10, s->logical_type)); + optional_logical_type(10, s->logical_type)); return function_builder(this, op); } bool CompactProtocolReader::read(LogicalType* l) { - auto op = - std::make_tuple(ParquetFieldUnion(1, l->isset.STRING, l->STRING), - ParquetFieldUnion(2, l->isset.MAP, l->MAP), - ParquetFieldUnion(3, l->isset.LIST, l->LIST), - ParquetFieldUnion(4, l->isset.ENUM, l->ENUM), - ParquetFieldUnion(5, l->isset.DECIMAL, l->DECIMAL), // read the struct - ParquetFieldUnion(6, l->isset.DATE, l->DATE), - ParquetFieldUnion(7, l->isset.TIME, l->TIME), // read the struct - ParquetFieldUnion(8, l->isset.TIMESTAMP, l->TIMESTAMP), // read the struct - ParquetFieldUnion(10, l->isset.INTEGER, l->INTEGER), // read the struct - ParquetFieldUnion(11, l->isset.UNKNOWN, l->UNKNOWN), - ParquetFieldUnion(12, l->isset.JSON, l->JSON), - ParquetFieldUnion(13, l->isset.BSON, l->BSON)); + auto op = std::make_tuple( + parquet_field_union_enumerator(1, l->type), + parquet_field_union_enumerator(2, l->type), + parquet_field_union_enumerator(3, l->type), + parquet_field_union_enumerator(4, l->type), + parquet_field_union_struct(5, l->type, l->decimal_type), + parquet_field_union_enumerator(6, l->type), + parquet_field_union_struct(7, l->type, l->time_type), + parquet_field_union_struct(8, l->type, l->timestamp_type), + parquet_field_union_struct(10, l->type, l->int_type), + parquet_field_union_enumerator(11, l->type), + parquet_field_union_enumerator(12, l->type), + parquet_field_union_enumerator(13, l->type)); return function_builder(this, op); } @@ -648,9 +597,9 @@ bool CompactProtocolReader::read(TimestampType* t) bool CompactProtocolReader::read(TimeUnit* u) { - auto op = std::make_tuple(ParquetFieldUnion(1, u->isset.MILLIS, u->MILLIS), - ParquetFieldUnion(2, u->isset.MICROS, u->MICROS), - ParquetFieldUnion(3, u->isset.NANOS, u->NANOS)); + auto op = std::make_tuple(parquet_field_union_enumerator(1, u->type), + parquet_field_union_enumerator(2, u->type), + parquet_field_union_enumerator(3, u->type)); return function_builder(this, op); } diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index 00810269d3c..fbeda7f1099 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -16,6 +16,8 @@ #include "compact_protocol_writer.hpp" +#include + namespace cudf::io::parquet::detail { /** @@ -46,13 +48,11 @@ size_t CompactProtocolWriter::write(DecimalType const& decimal) size_t CompactProtocolWriter::write(TimeUnit const& time_unit) { CompactProtocolFieldWriter c(*this); - auto const isset = time_unit.isset; - if (isset.MILLIS) { - c.field_struct(1, time_unit.MILLIS); - } else if (isset.MICROS) { - c.field_struct(2, time_unit.MICROS); - } else if (isset.NANOS) { - c.field_struct(3, time_unit.NANOS); + switch (time_unit.type) { + case TimeUnit::MILLIS: + case TimeUnit::MICROS: + case TimeUnit::NANOS: c.field_empty_struct(time_unit.type); break; + default: CUDF_FAIL("Trying to write an invalid TimeUnit " + std::to_string(time_unit.type)); } return c.value(); } @@ -84,31 +84,29 @@ size_t CompactProtocolWriter::write(IntType const& integer) size_t CompactProtocolWriter::write(LogicalType const& logical_type) { CompactProtocolFieldWriter c(*this); - auto const isset = logical_type.isset; - if (isset.STRING) { - c.field_struct(1, logical_type.STRING); - } else if (isset.MAP) { - c.field_struct(2, logical_type.MAP); - } else if (isset.LIST) { - c.field_struct(3, logical_type.LIST); - } else if (isset.ENUM) { - c.field_struct(4, logical_type.ENUM); - } else if (isset.DECIMAL) { - c.field_struct(5, logical_type.DECIMAL); - } else if (isset.DATE) { - c.field_struct(6, logical_type.DATE); - } else if (isset.TIME) { - c.field_struct(7, logical_type.TIME); - } else if (isset.TIMESTAMP) { - c.field_struct(8, logical_type.TIMESTAMP); - } else if (isset.INTEGER) { - c.field_struct(10, logical_type.INTEGER); - } else if (isset.UNKNOWN) { - c.field_struct(11, logical_type.UNKNOWN); - } else if (isset.JSON) { - c.field_struct(12, logical_type.JSON); - } else if (isset.BSON) { - c.field_struct(13, logical_type.BSON); + switch (logical_type.type) { + case LogicalType::STRING: + case LogicalType::MAP: + case LogicalType::LIST: + case LogicalType::ENUM: + case LogicalType::DATE: + case LogicalType::UNKNOWN: + case LogicalType::JSON: + case LogicalType::BSON: c.field_empty_struct(logical_type.type); break; + case LogicalType::DECIMAL: + c.field_struct(LogicalType::DECIMAL, logical_type.decimal_type.value()); + break; + case LogicalType::TIME: + c.field_struct(LogicalType::TIME, logical_type.time_type.value()); + break; + case LogicalType::TIMESTAMP: + c.field_struct(LogicalType::TIMESTAMP, logical_type.timestamp_type.value()); + break; + case LogicalType::INTEGER: + c.field_struct(LogicalType::INTEGER, logical_type.int_type.value()); + break; + default: + CUDF_FAIL("Trying to write an invalid LogicalType " + std::to_string(logical_type.type)); } return c.value(); } @@ -124,20 +122,15 @@ size_t CompactProtocolWriter::write(SchemaElement const& s) c.field_string(4, s.name); if (s.type == UNDEFINED_TYPE) { c.field_int(5, s.num_children); } - if (s.converted_type != UNKNOWN) { - c.field_int(6, s.converted_type); + if (s.converted_type.has_value()) { + c.field_int(6, s.converted_type.value()); if (s.converted_type == DECIMAL) { c.field_int(7, s.decimal_scale); c.field_int(8, s.decimal_precision); } } - if (s.field_id) { c.field_int(9, s.field_id.value()); } - auto const isset = s.logical_type.isset; - // TODO: add handling for all logical types - // if (isset.STRING or isset.MAP or isset.LIST or isset.ENUM or isset.DECIMAL or isset.DATE or - // isset.TIME or isset.TIMESTAMP or isset.INTEGER or isset.UNKNOWN or isset.JSON or isset.BSON) - // { - if (isset.TIMESTAMP or isset.TIME) { c.field_struct(10, s.logical_type); } + if (s.field_id.has_value()) { c.field_int(9, s.field_id.value()); } + if (s.logical_type.has_value()) { c.field_struct(10, s.logical_type.value()); } return c.value(); } @@ -223,9 +216,9 @@ size_t CompactProtocolWriter::write(OffsetIndex const& s) size_t CompactProtocolWriter::write(ColumnOrder const& co) { CompactProtocolFieldWriter c(*this); - switch (co) { - case ColumnOrder::TYPE_ORDER: c.field_empty_struct(1); break; - default: break; + switch (co.type) { + case ColumnOrder::TYPE_ORDER: c.field_empty_struct(co.type); break; + default: CUDF_FAIL("Trying to write an invalid ColumnOrder " + std::to_string(co.type)); } return c.value(); } diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 7c866fd8b9e..ab1cc68923d 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -1143,7 +1143,8 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s, units = cudf::timestamp_ms::period::den; } else if (s->col.converted_type == TIMESTAMP_MICROS) { units = cudf::timestamp_us::period::den; - } else if (s->col.logical_type.TIMESTAMP.unit.isset.NANOS) { + } else if (s->col.logical_type.has_value() and + s->col.logical_type->is_timestamp_nanos()) { units = cudf::timestamp_ns::period::den; } if (units and units != s->col.ts_clock_rate) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 1cd16ac6102..699cad89703 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -46,79 +46,98 @@ struct file_ender_s { uint32_t magic; }; -// thrift generated code simplified. -struct StringType {}; -struct MapType {}; -struct ListType {}; -struct EnumType {}; +// thrift inspired code simplified. struct DecimalType { int32_t scale = 0; int32_t precision = 0; }; -struct DateType {}; - -struct MilliSeconds {}; -struct MicroSeconds {}; -struct NanoSeconds {}; -using TimeUnit_isset = struct TimeUnit_isset { - bool MILLIS{false}; - bool MICROS{false}; - bool NANOS{false}; -}; struct TimeUnit { - TimeUnit_isset isset; - MilliSeconds MILLIS; - MicroSeconds MICROS; - NanoSeconds NANOS; + enum Type { UNDEFINED, MILLIS, MICROS, NANOS }; + Type type; }; struct TimeType { bool isAdjustedToUTC = false; TimeUnit unit; }; + struct TimestampType { bool isAdjustedToUTC = false; TimeUnit unit; }; + struct IntType { int8_t bitWidth = 0; bool isSigned = false; }; -struct NullType {}; -struct JsonType {}; -struct BsonType {}; - -// thrift generated code simplified. -using LogicalType_isset = struct LogicalType_isset { - bool STRING{false}; - bool MAP{false}; - bool LIST{false}; - bool ENUM{false}; - bool DECIMAL{false}; - bool DATE{false}; - bool TIME{false}; - bool TIMESTAMP{false}; - bool INTEGER{false}; - bool UNKNOWN{false}; - bool JSON{false}; - bool BSON{false}; -}; struct LogicalType { - LogicalType_isset isset; - StringType STRING; - MapType MAP; - ListType LIST; - EnumType ENUM; - DecimalType DECIMAL; - DateType DATE; - TimeType TIME; - TimestampType TIMESTAMP; - IntType INTEGER; - NullType UNKNOWN; - JsonType JSON; - BsonType BSON; + enum Type { + UNDEFINED, + STRING, + MAP, + LIST, + ENUM, + DECIMAL, + DATE, + TIME, + TIMESTAMP, + // 9 is reserved + INTEGER = 10, + UNKNOWN, + JSON, + BSON + }; + Type type; + thrust::optional decimal_type; + thrust::optional time_type; + thrust::optional timestamp_type; + thrust::optional int_type; + + LogicalType(Type tp = UNDEFINED) : type(tp) {} + LogicalType(DecimalType&& dt) : type(DECIMAL), decimal_type(dt) {} + LogicalType(TimeType&& tt) : type(TIME), time_type(tt) {} + LogicalType(TimestampType&& tst) : type(TIMESTAMP), timestamp_type(tst) {} + LogicalType(IntType&& it) : type(INTEGER), int_type(it) {} + + constexpr bool is_time_millis() const + { + return type == TIME and time_type->unit.type == TimeUnit::MILLIS; + } + + constexpr bool is_time_micros() const + { + return type == TIME and time_type->unit.type == TimeUnit::MICROS; + } + + constexpr bool is_time_nanos() const + { + return type == TIME and time_type->unit.type == TimeUnit::NANOS; + } + + constexpr bool is_timestamp_millis() const + { + return type == TIMESTAMP and timestamp_type->unit.type == TimeUnit::MILLIS; + } + + constexpr bool is_timestamp_micros() const + { + return type == TIMESTAMP and timestamp_type->unit.type == TimeUnit::MICROS; + } + + constexpr bool is_timestamp_nanos() const + { + return type == TIMESTAMP and timestamp_type->unit.type == TimeUnit::NANOS; + } + + constexpr int8_t bit_width() const { return type == INTEGER ? int_type->bitWidth : -1; } + + constexpr bool is_signed() const { return type == INTEGER and int_type->isSigned; } + + constexpr int32_t scale() const { return type == DECIMAL ? decimal_type->scale : -1; } + + constexpr int32_t precision() const { return type == DECIMAL ? decimal_type->precision : -1; } }; /** @@ -127,8 +146,6 @@ struct LogicalType { struct ColumnOrder { enum Type { UNDEFINED, TYPE_ORDER }; Type type; - - operator Type() const { return type; } }; /** @@ -138,18 +155,29 @@ struct ColumnOrder { * as a schema tree. */ struct SchemaElement { - Type type = UNDEFINED_TYPE; - ConvertedType converted_type = UNKNOWN; - LogicalType logical_type; - int32_t type_length = - 0; // Byte length of FIXED_LENGTH_BYTE_ARRAY elements, or maximum bit length for other types + // 1: parquet physical type for output + Type type = UNDEFINED_TYPE; + // 2: byte length of FIXED_LENGTH_BYTE_ARRAY elements, or maximum bit length for other types + int32_t type_length = 0; + // 3: repetition of the field FieldRepetitionType repetition_type = REQUIRED; - std::string name = ""; - int32_t num_children = 0; - int32_t decimal_scale = 0; - int32_t decimal_precision = 0; - thrust::optional field_id = thrust::nullopt; - bool output_as_byte_array = false; + // 4: name of the field + std::string name = ""; + // 5: nested fields + int32_t num_children = 0; + // 6: DEPRECATED: record the original type before conversion to parquet type + thrust::optional converted_type; + // 7: DEPRECATED: record the scale for DECIMAL converted type + int32_t decimal_scale = 0; + // 8: DEPRECATED: record the precision for DECIMAL converted type + int32_t decimal_precision = 0; + // 9: save field_id from original schema + thrust::optional field_id; + // 10: replaces converted type + thrust::optional logical_type; + + // extra cudf specific fields + bool output_as_byte_array = false; // The following fields are filled in later during schema initialization int max_definition_level = 0; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 164e2cea2ed..68851e72663 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -313,7 +313,7 @@ struct ColumnChunkDesc { uint8_t rep_level_bits_, int8_t codec_, int8_t converted_type_, - LogicalType logical_type_, + thrust::optional logical_type_, int8_t decimal_precision_, int32_t ts_clock_rate_, int32_t src_col_index_, @@ -355,20 +355,20 @@ struct ColumnChunkDesc { uint16_t data_type{}; // basic column data type, ((type_length << 3) | // parquet::Type) uint8_t - level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels - int32_t num_data_pages{}; // number of data pages - int32_t num_dict_pages{}; // number of dictionary pages - int32_t max_num_pages{}; // size of page_info array - PageInfo* page_info{}; // output page info for up to num_dict_pages + - // num_data_pages (dictionary pages first) - string_index_pair* str_dict_index{}; // index for string dictionary - bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column - void** column_data_base{}; // base pointers of column data - void** column_string_base{}; // base pointers of column string data - int8_t codec{}; // compressed codec enum - int8_t converted_type{}; // converted type enum - LogicalType logical_type{}; // logical type - int8_t decimal_precision{}; // Decimal precision + level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels + int32_t num_data_pages{}; // number of data pages + int32_t num_dict_pages{}; // number of dictionary pages + int32_t max_num_pages{}; // size of page_info array + PageInfo* page_info{}; // output page info for up to num_dict_pages + + // num_data_pages (dictionary pages first) + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + int8_t codec{}; // compressed codec enum + int8_t converted_type{}; // converted type enum + thrust::optional logical_type{}; // logical type + int8_t decimal_precision{}; // Decimal precision int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) int32_t src_col_index{}; // my input column index diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index ad52a7dfcc1..213fc380a34 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -304,11 +304,12 @@ std::vector find_splits(std::vector const& * * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type. */ -[[nodiscard]] std::tuple conversion_info(type_id column_type_id, - type_id timestamp_type_id, - Type physical, - int8_t converted, - int32_t length) +[[nodiscard]] std::tuple conversion_info( + type_id column_type_id, + type_id timestamp_type_id, + Type physical, + thrust::optional converted, + int32_t length) { int32_t type_width = (physical == FIXED_LEN_BYTE_ARRAY) ? length : 0; int32_t clock_rate = 0; @@ -322,7 +323,7 @@ std::vector find_splits(std::vector const& clock_rate = to_clockrate(timestamp_type_id); } - int8_t converted_type = converted; + int8_t converted_type = converted.value_or(UNKNOWN); if (converted_type == DECIMAL && column_type_id != type_id::FLOAT64 && not cudf::is_fixed_point(data_type{column_type_id})) { converted_type = UNKNOWN; // Not converting to float64 or decimal diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 040c6403f57..a9c84143e1a 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -25,44 +25,42 @@ namespace cudf::io::parquet::detail { namespace { -ConvertedType logical_type_to_converted_type(LogicalType const& logical) +ConvertedType logical_type_to_converted_type(thrust::optional const& logical) { - if (logical.isset.STRING) { - return UTF8; - } else if (logical.isset.MAP) { - return MAP; - } else if (logical.isset.LIST) { - return LIST; - } else if (logical.isset.ENUM) { - return ENUM; - } else if (logical.isset.DECIMAL) { - return DECIMAL; // TODO set decimal values - } else if (logical.isset.DATE) { - return DATE; - } else if (logical.isset.TIME) { - if (logical.TIME.unit.isset.MILLIS) - return TIME_MILLIS; - else if (logical.TIME.unit.isset.MICROS) - return TIME_MICROS; - } else if (logical.isset.TIMESTAMP) { - if (logical.TIMESTAMP.unit.isset.MILLIS) - return TIMESTAMP_MILLIS; - else if (logical.TIMESTAMP.unit.isset.MICROS) - return TIMESTAMP_MICROS; - } else if (logical.isset.INTEGER) { - switch (logical.INTEGER.bitWidth) { - case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8; - case 16: return logical.INTEGER.isSigned ? INT_16 : UINT_16; - case 32: return logical.INTEGER.isSigned ? INT_32 : UINT_32; - case 64: return logical.INTEGER.isSigned ? INT_64 : UINT_64; - default: break; - } - } else if (logical.isset.UNKNOWN) { - return NA; - } else if (logical.isset.JSON) { - return JSON; - } else if (logical.isset.BSON) { - return BSON; + if (not logical.has_value()) { return UNKNOWN; } + switch (logical->type) { + case LogicalType::STRING: return UTF8; + case LogicalType::MAP: return MAP; + case LogicalType::LIST: return LIST; + case LogicalType::ENUM: return ENUM; + case LogicalType::DECIMAL: return DECIMAL; // TODO use decimal scale/precision + case LogicalType::DATE: return DATE; + case LogicalType::TIME: + if (logical->is_time_millis()) { + return TIME_MILLIS; + } else if (logical->is_time_micros()) { + return TIME_MICROS; + } + break; + case LogicalType::TIMESTAMP: + if (logical->is_timestamp_millis()) { + return TIMESTAMP_MILLIS; + } else if (logical->is_timestamp_micros()) { + return TIMESTAMP_MICROS; + } + break; + case LogicalType::INTEGER: + switch (logical->bit_width()) { + case 8: return logical->is_signed() ? INT_8 : UINT_8; + case 16: return logical->is_signed() ? INT_16 : UINT_16; + case 32: return logical->is_signed() ? INT_32 : UINT_32; + case 64: return logical->is_signed() ? INT_64 : UINT_64; + default: break; + } + case LogicalType::UNKNOWN: return NA; + case LogicalType::JSON: return JSON; + case LogicalType::BSON: return BSON; + default: break; } return UNKNOWN; } @@ -76,20 +74,20 @@ type_id to_type_id(SchemaElement const& schema, bool strings_to_categorical, type_id timestamp_type_id) { - Type const physical = schema.type; - LogicalType const logical_type = schema.logical_type; - ConvertedType converted_type = schema.converted_type; - int32_t decimal_precision = schema.decimal_precision; + auto const physical = schema.type; + auto const logical_type = schema.logical_type; + auto converted_type = schema.converted_type; + int32_t decimal_precision = schema.decimal_precision; + // FIXME(ets): this should just use logical type to deduce the type_id. then fall back to + // converted_type if logical_type isn't set // Logical type used for actual data interpretation; the legacy converted type // is superseded by 'logical' type whenever available. auto const inferred_converted_type = logical_type_to_converted_type(logical_type); if (inferred_converted_type != UNKNOWN) { converted_type = inferred_converted_type; } - if (inferred_converted_type == DECIMAL) { - decimal_precision = schema.logical_type.DECIMAL.precision; - } + if (inferred_converted_type == DECIMAL) { decimal_precision = schema.logical_type->precision(); } - switch (converted_type) { + switch (converted_type.value_or(UNKNOWN)) { case UINT_8: return type_id::UINT8; case INT_8: return type_id::INT8; case UINT_16: return type_id::UINT16; @@ -140,15 +138,13 @@ type_id to_type_id(SchemaElement const& schema, default: break; } - if (inferred_converted_type == UNKNOWN and physical == INT64 and - logical_type.TIMESTAMP.unit.isset.NANOS) { - return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id - : type_id::TIMESTAMP_NANOSECONDS; - } - - if (inferred_converted_type == UNKNOWN and physical == INT64 and - logical_type.TIME.unit.isset.NANOS) { - return type_id::DURATION_NANOSECONDS; + if (inferred_converted_type == UNKNOWN and physical == INT64 and logical_type.has_value()) { + if (logical_type->is_timestamp_nanos()) { + return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id + : type_id::TIMESTAMP_NANOSECONDS; + } else if (logical_type->is_time_nanos()) { + return type_id::DURATION_NANOSECONDS; + } } // is it simply a struct? diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 50589f23626..c06acc1690b 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -284,6 +284,7 @@ struct leaf_schema_fn { { col_schema.type = Type::BOOLEAN; col_schema.stats_dtype = statistics_dtype::dtype_bool; + // BOOLEAN needs no converted or logical type } template @@ -292,6 +293,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::INT_8; col_schema.stats_dtype = statistics_dtype::dtype_int8; + col_schema.logical_type = LogicalType{IntType{8, true}}; } template @@ -300,6 +302,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::INT_16; col_schema.stats_dtype = statistics_dtype::dtype_int16; + col_schema.logical_type = LogicalType{IntType{16, true}}; } template @@ -307,6 +310,7 @@ struct leaf_schema_fn { { col_schema.type = Type::INT32; col_schema.stats_dtype = statistics_dtype::dtype_int32; + // INT32 needs no converted or logical type } template @@ -314,6 +318,7 @@ struct leaf_schema_fn { { col_schema.type = Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_int64; + // INT64 needs no converted or logical type } template @@ -322,6 +327,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::UINT_8; col_schema.stats_dtype = statistics_dtype::dtype_int8; + col_schema.logical_type = LogicalType{IntType{8, false}}; } template @@ -330,6 +336,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::UINT_16; col_schema.stats_dtype = statistics_dtype::dtype_int16; + col_schema.logical_type = LogicalType{IntType{16, false}}; } template @@ -338,6 +345,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::UINT_32; col_schema.stats_dtype = statistics_dtype::dtype_int32; + col_schema.logical_type = LogicalType{IntType{32, false}}; } template @@ -346,6 +354,7 @@ struct leaf_schema_fn { col_schema.type = Type::INT64; col_schema.converted_type = ConvertedType::UINT_64; col_schema.stats_dtype = statistics_dtype::dtype_int64; + col_schema.logical_type = LogicalType{IntType{64, false}}; } template @@ -353,6 +362,7 @@ struct leaf_schema_fn { { col_schema.type = Type::FLOAT; col_schema.stats_dtype = statistics_dtype::dtype_float32; + // FLOAT needs no converted or logical type } template @@ -360,6 +370,7 @@ struct leaf_schema_fn { { col_schema.type = Type::DOUBLE; col_schema.stats_dtype = statistics_dtype::dtype_float64; + // DOUBLE needs no converted or logical type } template @@ -367,11 +378,12 @@ struct leaf_schema_fn { { col_schema.type = Type::BYTE_ARRAY; if (col_meta.is_enabled_output_as_binary()) { - col_schema.converted_type = ConvertedType::UNKNOWN; - col_schema.stats_dtype = statistics_dtype::dtype_byte_array; + col_schema.stats_dtype = statistics_dtype::dtype_byte_array; + // BYTE_ARRAY needs no converted or logical type } else { col_schema.converted_type = ConvertedType::UTF8; col_schema.stats_dtype = statistics_dtype::dtype_string; + col_schema.logical_type = LogicalType{LogicalType::STRING}; } } @@ -381,49 +393,55 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.converted_type = ConvertedType::DATE; col_schema.stats_dtype = statistics_dtype::dtype_int32; + col_schema.logical_type = LogicalType{LogicalType::DATE}; } template std::enable_if_t, void> operator()() { - col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = - (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MILLIS; + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; col_schema.ts_scale = 1000; + if (not timestamp_is_int96) { + col_schema.converted_type = ConvertedType::TIMESTAMP_MILLIS; + col_schema.logical_type = LogicalType{TimestampType{false, TimeUnit::MILLIS}}; + } } template std::enable_if_t, void> operator()() { - col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = - (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MILLIS; + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + if (not timestamp_is_int96) { + col_schema.converted_type = ConvertedType::TIMESTAMP_MILLIS; + col_schema.logical_type = LogicalType{TimestampType{false, TimeUnit::MILLIS}}; + } } template std::enable_if_t, void> operator()() { - col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = - (timestamp_is_int96) ? ConvertedType::UNKNOWN : ConvertedType::TIMESTAMP_MICROS; + col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; + if (not timestamp_is_int96) { + col_schema.converted_type = ConvertedType::TIMESTAMP_MICROS; + col_schema.logical_type = LogicalType{TimestampType{false, TimeUnit::MICROS}}; + } } template std::enable_if_t, void> operator()() { col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = ConvertedType::UNKNOWN; + col_schema.converted_type = thrust::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; if (timestamp_is_int96) { col_schema.ts_scale = -1000; // negative value indicates division by absolute value } // set logical type if it's not int96 else { - col_schema.logical_type.isset.TIMESTAMP = true; - col_schema.logical_type.TIMESTAMP.unit.isset.NANOS = true; + col_schema.logical_type = LogicalType{TimestampType{false, TimeUnit::NANOS}}; } } @@ -431,53 +449,48 @@ struct leaf_schema_fn { template std::enable_if_t, void> operator()() { - col_schema.type = Type::INT32; - col_schema.converted_type = ConvertedType::TIME_MILLIS; - col_schema.stats_dtype = statistics_dtype::dtype_int32; - col_schema.ts_scale = 24 * 60 * 60 * 1000; - col_schema.logical_type.isset.TIME = true; - col_schema.logical_type.TIME.unit.isset.MILLIS = true; + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + col_schema.ts_scale = 24 * 60 * 60 * 1000; + col_schema.logical_type = LogicalType{TimeType{false, TimeUnit::MILLIS}}; } template std::enable_if_t, void> operator()() { - col_schema.type = Type::INT32; - col_schema.converted_type = ConvertedType::TIME_MILLIS; - col_schema.stats_dtype = statistics_dtype::dtype_int32; - col_schema.ts_scale = 1000; - col_schema.logical_type.isset.TIME = true; - col_schema.logical_type.TIME.unit.isset.MILLIS = true; + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + col_schema.ts_scale = 1000; + col_schema.logical_type = LogicalType{TimeType{false, TimeUnit::MILLIS}}; } template std::enable_if_t, void> operator()() { - col_schema.type = Type::INT32; - col_schema.converted_type = ConvertedType::TIME_MILLIS; - col_schema.stats_dtype = statistics_dtype::dtype_int32; - col_schema.logical_type.isset.TIME = true; - col_schema.logical_type.TIME.unit.isset.MILLIS = true; + col_schema.type = Type::INT32; + col_schema.converted_type = ConvertedType::TIME_MILLIS; + col_schema.stats_dtype = statistics_dtype::dtype_int32; + col_schema.logical_type = LogicalType{TimeType{false, TimeUnit::MILLIS}}; } template std::enable_if_t, void> operator()() { - col_schema.type = Type::INT64; - col_schema.converted_type = ConvertedType::TIME_MICROS; - col_schema.stats_dtype = statistics_dtype::dtype_int64; - col_schema.logical_type.isset.TIME = true; - col_schema.logical_type.TIME.unit.isset.MICROS = true; + col_schema.type = Type::INT64; + col_schema.converted_type = ConvertedType::TIME_MICROS; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + col_schema.logical_type = LogicalType{TimeType{false, TimeUnit::MICROS}}; } // unsupported outside cudf for parquet 1.0. template std::enable_if_t, void> operator()() { - col_schema.type = Type::INT64; - col_schema.stats_dtype = statistics_dtype::dtype_int64; - col_schema.logical_type.isset.TIME = true; - col_schema.logical_type.TIME.unit.isset.NANOS = true; + col_schema.type = Type::INT64; + col_schema.stats_dtype = statistics_dtype::dtype_int64; + col_schema.logical_type = LogicalType{TimeType{false, TimeUnit::NANOS}}; } template @@ -487,27 +500,32 @@ struct leaf_schema_fn { col_schema.type = Type::INT32; col_schema.stats_dtype = statistics_dtype::dtype_int32; col_schema.decimal_precision = MAX_DECIMAL32_PRECISION; + col_schema.logical_type = LogicalType{DecimalType{0, MAX_DECIMAL32_PRECISION}}; } else if (std::is_same_v) { col_schema.type = Type::INT64; col_schema.stats_dtype = statistics_dtype::dtype_decimal64; col_schema.decimal_precision = MAX_DECIMAL64_PRECISION; + col_schema.logical_type = LogicalType{DecimalType{0, MAX_DECIMAL64_PRECISION}}; } else if (std::is_same_v) { col_schema.type = Type::FIXED_LEN_BYTE_ARRAY; col_schema.type_length = sizeof(__int128_t); col_schema.stats_dtype = statistics_dtype::dtype_decimal128; col_schema.decimal_precision = MAX_DECIMAL128_PRECISION; + col_schema.logical_type = LogicalType{DecimalType{0, MAX_DECIMAL128_PRECISION}}; } else { CUDF_FAIL("Unsupported fixed point type for parquet writer"); } col_schema.converted_type = ConvertedType::DECIMAL; col_schema.decimal_scale = -col->type().scale(); // parquet and cudf disagree about scale signs + col_schema.logical_type->decimal_type->scale = -col->type().scale(); if (col_meta.is_decimal_precision_set()) { CUDF_EXPECTS(col_meta.get_decimal_precision() >= col_schema.decimal_scale, "Precision must be equal to or greater than scale!"); if (col_schema.type == Type::INT64 and col_meta.get_decimal_precision() < 10) { CUDF_LOG_WARN("Parquet writer: writing a decimal column with precision < 10 as int64"); } - col_schema.decimal_precision = col_meta.get_decimal_precision(); + col_schema.decimal_precision = col_meta.get_decimal_precision(); + col_schema.logical_type->decimal_type->precision = col_meta.get_decimal_precision(); } } @@ -593,7 +611,7 @@ std::vector construct_schema_tree( schema_tree_node col_schema{}; col_schema.type = Type::BYTE_ARRAY; - col_schema.converted_type = ConvertedType::UNKNOWN; + col_schema.converted_type = thrust::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_byte_array; col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED; col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); @@ -762,7 +780,10 @@ struct parquet_column_view { [[nodiscard]] column_view cudf_column_view() const { return cudf_col; } [[nodiscard]] Type physical_type() const { return schema_node.type; } - [[nodiscard]] ConvertedType converted_type() const { return schema_node.converted_type; } + [[nodiscard]] ConvertedType converted_type() const + { + return schema_node.converted_type.value_or(UNKNOWN); + } std::vector const& get_path_in_schema() { return path_in_schema; } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 2a654bd7e8c..fece83f891b 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -4075,11 +4075,12 @@ int32_t compare(T& v1, T& v2) int32_t compare_binary(std::vector const& v1, std::vector const& v2, cudf::io::parquet::detail::Type ptype, - cudf::io::parquet::detail::ConvertedType ctype) + thrust::optional const& ctype) { + auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN); switch (ptype) { case cudf::io::parquet::detail::INT32: - switch (ctype) { + switch (ctype_val) { case cudf::io::parquet::detail::UINT_8: case cudf::io::parquet::detail::UINT_16: case cudf::io::parquet::detail::UINT_32: @@ -4091,7 +4092,7 @@ int32_t compare_binary(std::vector const& v1, } case cudf::io::parquet::detail::INT64: - if (ctype == cudf::io::parquet::detail::UINT_64) { + if (ctype_val == cudf::io::parquet::detail::UINT_64) { return compare(*(reinterpret_cast(v1.data())), *(reinterpret_cast(v2.data()))); } From 8ae3aab79e14f8af733879e1e4b62b75b0f62368 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 23 Oct 2023 10:43:33 -0700 Subject: [PATCH 74/76] Extract `debug_utilities.hpp/cu` from `column_utilities.hpp/cu` (#13720) This PR extracts the implementation of the debug utility function `cudf::test::print()` from `column_utilities.hpp/cu` into its separate header/source files (`debug_utilities.hpp/cu`) for better organizing the relevant code. The new header name is also more expressive and more relevant to its purpose. The changes in this PR are only moving code around. Not any new functionality or implementation was added. Closes https://github.com/rapidsai/cudf/issues/13450 (although this is not to address that issue). Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Robert Maynard (https://github.com/robertmaynard) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/13720 --- cpp/CMakeLists.txt | 1 + cpp/include/cudf_test/column_utilities.hpp | 33 -- cpp/include/cudf_test/debug_utilities.hpp | 47 ++ .../cudf_test/detail/column_utilities.hpp | 85 ---- cpp/tests/CMakeLists.txt | 1 + cpp/tests/groupby/structs_tests.cpp | 1 + cpp/tests/utilities/column_utilities.cu | 408 +-------------- cpp/tests/utilities/debug_utilities.cu | 480 ++++++++++++++++++ .../utilities_tests/column_debug_tests.cpp | 137 +++++ .../column_utilities_tests.cpp | 100 ---- 10 files changed, 674 insertions(+), 619 deletions(-) create mode 100644 cpp/include/cudf_test/debug_utilities.hpp delete mode 100644 cpp/include/cudf_test/detail/column_utilities.hpp create mode 100644 cpp/tests/utilities/debug_utilities.cu create mode 100644 cpp/tests/utilities_tests/column_debug_tests.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f8b9762f1d4..472ee9d9fd4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -835,6 +835,7 @@ if(CUDF_BUILD_TESTUTIL) tests/io/metadata_utilities.cpp tests/utilities/base_fixture.cpp tests/utilities/column_utilities.cu + tests/utilities/debug_utilities.cu tests/utilities/table_utilities.cu tests/utilities/tdigest_utilities.cu ) diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 059bd10eae1..f6872fcdd6d 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -140,39 +140,6 @@ void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_byt */ void expect_column_empty(cudf::column_view const& col); -/** - * @brief Formats a column view as a string - * - * @param col The column view - * @param delimiter The delimiter to put between strings - */ -std::string to_string(cudf::column_view const& col, std::string const& delimiter); - -/** - * @brief Formats a null mask as a string - * - * @param null_mask The null mask buffer - * @param null_mask_size Size of the null mask (in rows) - */ -std::string to_string(std::vector const& null_mask, size_type null_mask_size); - -/** - * @brief Convert column values to a host vector of strings - * - * @param col The column view - */ -std::vector to_strings(cudf::column_view const& col); - -/** - * @brief Print a column view to an ostream - * - * @param os The output stream - * @param col The column view - */ -void print(cudf::column_view const& col, - std::ostream& os = std::cout, - std::string const& delimiter = ","); - /** * @brief Copy the null bitmask from a column view to a host vector * diff --git a/cpp/include/cudf_test/debug_utilities.hpp b/cpp/include/cudf_test/debug_utilities.hpp new file mode 100644 index 00000000000..a0881490b82 --- /dev/null +++ b/cpp/include/cudf_test/debug_utilities.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cudf::test { + +/** + * @brief Formats a column view as a string + * + * @param col The input column view + * @param delimiter The delimiter to put between strings + */ +std::string to_string(cudf::column_view const& col, std::string const& delimiter); + +/** + * @brief Convert column values to a host vector of strings + * + * @param col The input column view + */ +std::vector to_strings(cudf::column_view const& col); + +/** + * @brief Print a column view to an ostream + * + * @param col The input column view + * @param os The output stream + */ +void print(cudf::column_view const& col, std::ostream& os = std::cout); + +} // namespace cudf::test diff --git a/cpp/include/cudf_test/detail/column_utilities.hpp b/cpp/include/cudf_test/detail/column_utilities.hpp deleted file mode 100644 index f8270f61f10..00000000000 --- a/cpp/include/cudf_test/detail/column_utilities.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace cudf { -namespace test { -namespace detail { - -/** - * @brief Formats a column view as a string - * - * @param col The column view - * @param delimiter The delimiter to put between strings - * @param indent Indentation for all output. See detail::to_strings for detailed - * explanation. - */ -std::string to_string(cudf::column_view const& col, - std::string const& delimiter, - std::string const& indent = ""); - -/** - * @brief Formats a null mask as a string - * - * @param null_mask The null mask buffer - * @param null_mask_size Size of the null mask (in rows) - * @param indent Indentation for all output. See detail::to_strings for detailed - * explanation. - */ -std::string to_string(std::vector const& null_mask, - size_type null_mask_size, - std::string const& indent = ""); - -/** - * @brief Convert column values to a host vector of strings - * - * Supports indentation of all output. For example, if the displayed output of your column - * would be - * - * @code{.pseudo} - * "1,2,3,4,5" - * @endcode - * and the `indent` parameter was " ", that indentation would be prepended to - * result in the output - * @code{.pseudo} - * " 1,2,3,4,5" - * @endcode - * - * The can be useful for displaying complex types. An example use case would be for - * displaying the nesting of a LIST type column (via recursion). - * - * List>: - * Length : 3 - * Offsets : 0, 2, 5, 6 - * Children : - * List: - * Length : 6 - * Offsets : 0, 2, 4, 7, 8, 9, 11 - * Children : - * 1, 2, 3, 4, 5, 6, 7, 0, 8, 9, 10 - * - * @param col The column view - * @param indent Indentation for all output - */ -std::vector to_strings(cudf::column_view const& col, std::string const& indent = ""); - -} // namespace detail -} // namespace test -} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 16e7239ebd8..eb0585d8f3e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -357,6 +357,7 @@ ConfigureTest( ConfigureTest( UTILITIES_TEST utilities_tests/type_list_tests.cpp + utilities_tests/column_debug_tests.cpp utilities_tests/column_utilities_tests.cpp utilities_tests/column_wrapper_tests.cpp utilities_tests/lists_column_wrapper_tests.cpp diff --git a/cpp/tests/groupby/structs_tests.cpp b/cpp/tests/groupby/structs_tests.cpp index f85fc6335f6..af6f613d344 100644 --- a/cpp/tests/groupby/structs_tests.cpp +++ b/cpp/tests/groupby/structs_tests.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 620e0bfe8de..f54ea28d9b2 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -14,28 +14,24 @@ * limitations under the License. */ +#include +#include +#include +#include +#include + #include #include #include #include #include -#include -#include -#include +#include #include -#include #include #include -#include #include #include -#include -#include -#include -#include -#include - #include #include @@ -928,396 +924,6 @@ std::vector bitmask_to_host(cudf::column_view const& c) } } -namespace { - -template >* = nullptr> -static auto numeric_to_string_precise(T value) -{ - return std::to_string(value); -} - -template >* = nullptr> -static auto numeric_to_string_precise(T value) -{ - std::ostringstream o; - o << std::setprecision(std::numeric_limits::max_digits10) << value; - return o.str(); -} - -static auto duration_suffix(cudf::duration_D) { return " days"; } - -static auto duration_suffix(cudf::duration_s) { return " seconds"; } - -static auto duration_suffix(cudf::duration_ms) { return " milliseconds"; } - -static auto duration_suffix(cudf::duration_us) { return " microseconds"; } - -static auto duration_suffix(cudf::duration_ns) { return " nanoseconds"; } - -std::string get_nested_type_str(cudf::column_view const& view) -{ - if (view.type().id() == cudf::type_id::LIST) { - lists_column_view lcv(view); - return cudf::type_to_name(view.type()) + "<" + (get_nested_type_str(lcv.child())) + ">"; - } - - if (view.type().id() == cudf::type_id::STRUCT) { - std::ostringstream out; - - out << cudf::type_to_name(view.type()) + "<"; - std::transform(view.child_begin(), - view.child_end(), - std::ostream_iterator(out, ","), - [&out](auto const col) { return get_nested_type_str(col); }); - out << ">"; - return out.str(); - } - - return cudf::type_to_name(view.type()); -} - -template -std::string nested_offsets_to_string(NestedColumnView const& c, std::string const& delimiter = ", ") -{ - column_view offsets = (c.parent()).child(NestedColumnView::offsets_column_index); - CUDF_EXPECTS(offsets.type().id() == type_id::INT32, - "Column does not appear to be an offsets column"); - CUDF_EXPECTS(offsets.offset() == 0, "Offsets column has an internal offset!"); - size_type output_size = c.size() + 1; - - // the first offset value to normalize everything against - size_type first = - cudf::detail::get_value(offsets, c.offset(), cudf::test::get_default_stream()); - rmm::device_uvector shifted_offsets(output_size, cudf::test::get_default_stream()); - - // normalize the offset values for the column offset - size_type const* d_offsets = offsets.head() + c.offset(); - thrust::transform( - rmm::exec_policy(cudf::test::get_default_stream()), - d_offsets, - d_offsets + output_size, - shifted_offsets.begin(), - [first] __device__(int32_t offset) { return static_cast(offset - first); }); - - auto const h_shifted_offsets = - cudf::detail::make_host_vector_sync(shifted_offsets, cudf::test::get_default_stream()); - std::ostringstream buffer; - for (size_t idx = 0; idx < h_shifted_offsets.size(); idx++) { - buffer << h_shifted_offsets[idx]; - if (idx < h_shifted_offsets.size() - 1) { buffer << delimiter; } - } - return buffer.str(); -} - -struct column_view_printer { - template ()>* = nullptr> - void operator()(cudf::column_view const& col, std::vector& out, std::string const&) - { - auto h_data = cudf::test::to_host(col); - - out.resize(col.size()); - - if (col.nullable()) { - std::transform(thrust::make_counting_iterator(size_type{0}), - thrust::make_counting_iterator(col.size()), - out.begin(), - [&h_data](auto idx) { - return bit_is_set(h_data.second.data(), idx) - ? numeric_to_string_precise(h_data.first[idx]) - : std::string("NULL"); - }); - - } else { - std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) { - return numeric_to_string_precise(el); - }); - } - } - - template ()>* = nullptr> - void operator()(cudf::column_view const& col, - std::vector& out, - std::string const& indent) - { - // For timestamps, convert timestamp column to column of strings, then - // call string version - std::string format = [&]() { - if constexpr (std::is_same_v) { - return std::string{"%Y-%m-%dT%H:%M:%SZ"}; - } else if constexpr (std::is_same_v) { - return std::string{"%Y-%m-%dT%H:%M:%S.%3fZ"}; - } else if constexpr (std::is_same_v) { - return std::string{"%Y-%m-%dT%H:%M:%S.%6fZ"}; - } else if constexpr (std::is_same_v) { - return std::string{"%Y-%m-%dT%H:%M:%S.%9fZ"}; - } - return std::string{"%Y-%m-%d"}; - }(); - - auto col_as_strings = cudf::strings::from_timestamps(col, format); - if (col_as_strings->size() == 0) { return; } - - this->template operator()(*col_as_strings, out, indent); - } - - template ()>* = nullptr> - void operator()(cudf::column_view const& col, std::vector& out, std::string const&) - { - auto const h_data = cudf::test::to_host(col); - if (col.nullable()) { - std::transform(thrust::make_counting_iterator(size_type{0}), - thrust::make_counting_iterator(col.size()), - std::back_inserter(out), - [&h_data](auto idx) { - return h_data.second.empty() || bit_is_set(h_data.second.data(), idx) - ? static_cast(h_data.first[idx]) - : std::string("NULL"); - }); - } else { - std::transform(std::cbegin(h_data.first), - std::cend(h_data.first), - std::back_inserter(out), - [col](auto const& fp) { return static_cast(fp); }); - } - } - - template >* = nullptr> - void operator()(cudf::column_view const& col, std::vector& out, std::string const&) - { - // - // Implementation for strings, call special to_host variant - // - if (col.is_empty()) return; - auto h_data = cudf::test::to_host(col); - - // explicitly replace some special whitespace characters with their literal equivalents - auto cleaned = [](std::string_view in) { - std::string out(in); - auto replace_char = [](std::string& out, char c, std::string_view repl) { - for (std::string::size_type pos{}; out.npos != (pos = out.find(c, pos)); pos++) { - out.replace(pos, 1, repl); - } - }; - replace_char(out, '\a', "\\a"); - replace_char(out, '\b', "\\b"); - replace_char(out, '\f', "\\f"); - replace_char(out, '\r', "\\r"); - replace_char(out, '\t', "\\t"); - replace_char(out, '\n', "\\n"); - replace_char(out, '\v', "\\v"); - return out; - }; - - out.resize(col.size()); - std::transform(thrust::make_counting_iterator(size_type{0}), - thrust::make_counting_iterator(col.size()), - out.begin(), - [&](auto idx) { - return h_data.second.empty() || bit_is_set(h_data.second.data(), idx) - ? cleaned(h_data.first[idx]) - : std::string("NULL"); - }); - } - - template >* = nullptr> - void operator()(cudf::column_view const& col, std::vector& out, std::string const&) - { - cudf::dictionary_column_view dictionary(col); - if (col.is_empty()) return; - std::vector keys = to_strings(dictionary.keys()); - std::vector indices = to_strings({dictionary.indices().type(), - dictionary.size(), - dictionary.indices().head(), - dictionary.null_mask(), - dictionary.null_count(), - dictionary.offset()}); - out.insert(out.end(), keys.begin(), keys.end()); - if (!indices.empty()) { - std::string first = "\x08 : " + indices.front(); // use : as delimiter - out.push_back(first); // between keys and indices - out.insert(out.end(), indices.begin() + 1, indices.end()); - } - } - - // Print the tick counts with the units - template ()>* = nullptr> - void operator()(cudf::column_view const& col, std::vector& out, std::string const&) - { - auto h_data = cudf::test::to_host(col); - - out.resize(col.size()); - - if (col.nullable()) { - std::transform(thrust::make_counting_iterator(size_type{0}), - thrust::make_counting_iterator(col.size()), - out.begin(), - [&h_data](auto idx) { - return bit_is_set(h_data.second.data(), idx) - ? numeric_to_string_precise(h_data.first[idx].count()) + - duration_suffix(h_data.first[idx]) - : std::string("NULL"); - }); - - } else { - std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) { - return numeric_to_string_precise(el.count()) + duration_suffix(el); - }); - } - } - - template >* = nullptr> - void operator()(cudf::column_view const& col, - std::vector& out, - std::string const& indent) - { - lists_column_view lcv(col); - - // propagate slicing to the child if necessary - column_view child = lcv.get_sliced_child(cudf::test::get_default_stream()); - bool const is_sliced = lcv.offset() > 0 || child.offset() > 0; - - std::string tmp = - get_nested_type_str(col) + (is_sliced ? "(sliced)" : "") + ":\n" + indent + - "Length : " + std::to_string(lcv.size()) + "\n" + indent + - "Offsets : " + (lcv.size() > 0 ? nested_offsets_to_string(lcv) : "") + "\n" + - (lcv.parent().nullable() - ? indent + "Null count: " + std::to_string(lcv.null_count()) + "\n" + - detail::to_string(bitmask_to_host(col), col.size(), indent) + "\n" - : "") + - // non-nested types don't typically display their null masks, so do it here for convenience. - (!is_nested(child.type()) && child.nullable() - ? " " + detail::to_string(bitmask_to_host(child), child.size(), indent) + "\n" - : "") + - (detail::to_string(child, ", ", indent + " ")) + "\n"; - - out.push_back(tmp); - } - - template >* = nullptr> - void operator()(cudf::column_view const& col, - std::vector& out, - std::string const& indent) - { - structs_column_view view{col}; - - std::ostringstream out_stream; - - out_stream << get_nested_type_str(col) << ":\n" - << indent << "Length : " << view.size() << ":\n"; - if (view.nullable()) { - out_stream << indent << "Null count: " << view.null_count() << "\n" - << detail::to_string(bitmask_to_host(col), col.size(), indent) << "\n"; - } - - auto iter = thrust::make_counting_iterator(0); - std::transform( - iter, - iter + view.num_children(), - std::ostream_iterator(out_stream, "\n"), - [&](size_type index) { - auto child = view.get_sliced_child(index, cudf::test::get_default_stream()); - - // non-nested types don't typically display their null masks, so do it here for convenience. - return (!is_nested(child.type()) && child.nullable() - ? " " + detail::to_string(bitmask_to_host(child), child.size(), indent) + "\n" - : "") + - detail::to_string(child, ", ", indent + " "); - }); - - out.push_back(out_stream.str()); - } -}; - -} // namespace - -namespace detail { - -/** - * @copydoc cudf::test::detail::to_strings - */ -std::vector to_strings(cudf::column_view const& col, std::string const& indent) -{ - std::vector reply; - cudf::type_dispatcher(col.type(), column_view_printer{}, col, reply, indent); - return reply; -} - -/** - * @copydoc cudf::test::detail::to_string(cudf::column_view, std::string, std::string) - * - * @param indent Indentation for all output - */ -std::string to_string(cudf::column_view const& col, - std::string const& delimiter, - std::string const& indent) -{ - std::ostringstream buffer; - std::vector h_data = to_strings(col, indent); - - buffer << indent; - std::copy(h_data.begin(), - h_data.end() - (!h_data.empty()), - std::ostream_iterator(buffer, delimiter.c_str())); - if (!h_data.empty()) buffer << h_data.back(); - - return buffer.str(); -} - -/** - * @copydoc cudf::test::detail::to_string(std::vector, size_type, std::string) - * - * @param indent Indentation for all output. See comment in `to_strings` for - * a detailed description. - */ -std::string to_string(std::vector const& null_mask, - size_type null_mask_size, - std::string const& indent) -{ - std::ostringstream buffer; - buffer << indent; - for (int idx = null_mask_size - 1; idx >= 0; idx--) { - buffer << (cudf::bit_is_set(null_mask.data(), idx) ? "1" : "0"); - } - return buffer.str(); -} - -} // namespace detail - -/** - * @copydoc cudf::test::to_strings - */ -std::vector to_strings(cudf::column_view const& col) -{ - return detail::to_strings(col); -} - -/** - * @copydoc cudf::test::to_string(cudf::column_view, std::string) - */ -std::string to_string(cudf::column_view const& col, std::string const& delimiter) -{ - return detail::to_string(col, delimiter); -} - -/** - * @copydoc cudf::test::to_string(std::vector, size_type) - */ -std::string to_string(std::vector const& null_mask, size_type null_mask_size) -{ - return detail::to_string(null_mask, null_mask_size); -} - -/** - * @copydoc cudf::test::print - */ -void print(cudf::column_view const& col, std::ostream& os, std::string const& delimiter) -{ - os << to_string(col, delimiter) << std::endl; -} - /** * @copydoc cudf::test::validate_host_masks */ diff --git a/cpp/tests/utilities/debug_utilities.cu b/cpp/tests/utilities/debug_utilities.cu new file mode 100644 index 00000000000..a8a43ffb4ca --- /dev/null +++ b/cpp/tests/utilities/debug_utilities.cu @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include + +namespace cudf::test { + +// Forward declaration. +namespace detail { + +/** + * @brief Formats a column view as a string + * + * @param col The column view + * @param delimiter The delimiter to put between strings + * @param indent Indentation for all output + */ +std::string to_string(cudf::column_view const& col, + std::string const& delimiter, + std::string const& indent = ""); + +/** + * @brief Formats a null mask as a string + * + * @param null_mask The null mask buffer + * @param null_mask_size Size of the null mask (in rows) + * @param indent Indentation for all output + */ +std::string to_string(std::vector const& null_mask, + size_type null_mask_size, + std::string const& indent = ""); + +/** + * @brief Convert column values to a host vector of strings + * + * Supports indentation of all output. For example, if the displayed output of your column + * would be + * + * @code{.pseudo} + * "1,2,3,4,5" + * @endcode + * and the `indent` parameter was " ", that indentation would be prepended to + * result in the output + * @code{.pseudo} + * " 1,2,3,4,5" + * @endcode + * + * The can be useful for displaying complex types. An example use case would be for + * displaying the nesting of a LIST type column (via recursion). + * + * List>: + * Length : 3 + * Offsets : 0, 2, 5, 6 + * Children : + * List: + * Length : 6 + * Offsets : 0, 2, 4, 7, 8, 9, 11 + * Children : + * 1, 2, 3, 4, 5, 6, 7, 0, 8, 9, 10 + * + * @param col The column view + * @param indent Indentation for all output + */ +std::vector to_strings(cudf::column_view const& col, std::string const& indent = ""); + +} // namespace detail + +namespace { + +template >* = nullptr> +static auto numeric_to_string_precise(T value) +{ + return std::to_string(value); +} + +template >* = nullptr> +static auto numeric_to_string_precise(T value) +{ + std::ostringstream o; + o << std::setprecision(std::numeric_limits::max_digits10) << value; + return o.str(); +} + +static auto duration_suffix(cudf::duration_D) { return " days"; } + +static auto duration_suffix(cudf::duration_s) { return " seconds"; } + +static auto duration_suffix(cudf::duration_ms) { return " milliseconds"; } + +static auto duration_suffix(cudf::duration_us) { return " microseconds"; } + +static auto duration_suffix(cudf::duration_ns) { return " nanoseconds"; } + +std::string get_nested_type_str(cudf::column_view const& view) +{ + if (view.type().id() == cudf::type_id::LIST) { + lists_column_view lcv(view); + return cudf::type_to_name(view.type()) + "<" + (get_nested_type_str(lcv.child())) + ">"; + } + + if (view.type().id() == cudf::type_id::STRUCT) { + std::ostringstream out; + + out << cudf::type_to_name(view.type()) + "<"; + std::transform(view.child_begin(), + view.child_end(), + std::ostream_iterator(out, ","), + [&out](auto const col) { return get_nested_type_str(col); }); + out << ">"; + return out.str(); + } + + return cudf::type_to_name(view.type()); +} + +template +std::string nested_offsets_to_string(NestedColumnView const& c, std::string const& delimiter = ", ") +{ + column_view offsets = (c.parent()).child(NestedColumnView::offsets_column_index); + CUDF_EXPECTS(offsets.type().id() == type_id::INT32, + "Column does not appear to be an offsets column"); + CUDF_EXPECTS(offsets.offset() == 0, "Offsets column has an internal offset!"); + size_type output_size = c.size() + 1; + + // the first offset value to normalize everything against + size_type first = + cudf::detail::get_value(offsets, c.offset(), cudf::get_default_stream()); + rmm::device_uvector shifted_offsets(output_size, cudf::get_default_stream()); + + // normalize the offset values for the column offset + size_type const* d_offsets = offsets.head() + c.offset(); + thrust::transform( + rmm::exec_policy(cudf::get_default_stream()), + d_offsets, + d_offsets + output_size, + shifted_offsets.begin(), + [first] __device__(int32_t offset) { return static_cast(offset - first); }); + + auto const h_shifted_offsets = + cudf::detail::make_host_vector_sync(shifted_offsets, cudf::get_default_stream()); + std::ostringstream buffer; + for (size_t idx = 0; idx < h_shifted_offsets.size(); idx++) { + buffer << h_shifted_offsets[idx]; + if (idx < h_shifted_offsets.size() - 1) { buffer << delimiter; } + } + return buffer.str(); +} + +struct column_view_printer { + template ()>* = nullptr> + void operator()(cudf::column_view const& col, std::vector& out, std::string const&) + { + auto h_data = cudf::test::to_host(col); + + out.resize(col.size()); + + if (col.nullable()) { + std::transform(thrust::make_counting_iterator(size_type{0}), + thrust::make_counting_iterator(col.size()), + out.begin(), + [&h_data](auto idx) { + return bit_is_set(h_data.second.data(), idx) + ? numeric_to_string_precise(h_data.first[idx]) + : std::string("NULL"); + }); + + } else { + std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) { + return numeric_to_string_precise(el); + }); + } + } + + template ()>* = nullptr> + void operator()(cudf::column_view const& col, + std::vector& out, + std::string const& indent) + { + // For timestamps, convert timestamp column to column of strings, then + // call string version + std::string format = [&]() { + if constexpr (std::is_same_v) { + return std::string{"%Y-%m-%dT%H:%M:%SZ"}; + } else if constexpr (std::is_same_v) { + return std::string{"%Y-%m-%dT%H:%M:%S.%3fZ"}; + } else if constexpr (std::is_same_v) { + return std::string{"%Y-%m-%dT%H:%M:%S.%6fZ"}; + } else if constexpr (std::is_same_v) { + return std::string{"%Y-%m-%dT%H:%M:%S.%9fZ"}; + } + return std::string{"%Y-%m-%d"}; + }(); + + auto col_as_strings = cudf::strings::from_timestamps(col, format); + if (col_as_strings->size() == 0) { return; } + + this->template operator()(*col_as_strings, out, indent); + } + + template ()>* = nullptr> + void operator()(cudf::column_view const& col, std::vector& out, std::string const&) + { + auto const h_data = cudf::test::to_host(col); + if (col.nullable()) { + std::transform(thrust::make_counting_iterator(size_type{0}), + thrust::make_counting_iterator(col.size()), + std::back_inserter(out), + [&h_data](auto idx) { + return h_data.second.empty() || bit_is_set(h_data.second.data(), idx) + ? static_cast(h_data.first[idx]) + : std::string("NULL"); + }); + } else { + std::transform(std::cbegin(h_data.first), + std::cend(h_data.first), + std::back_inserter(out), + [col](auto const& fp) { return static_cast(fp); }); + } + } + + template >* = nullptr> + void operator()(cudf::column_view const& col, std::vector& out, std::string const&) + { + // + // Implementation for strings, call special to_host variant + // + if (col.is_empty()) return; + auto h_data = cudf::test::to_host(col); + + // explicitly replace some special whitespace characters with their literal equivalents + auto cleaned = [](std::string_view in) { + std::string out(in); + auto replace_char = [](std::string& out, char c, std::string_view repl) { + for (std::string::size_type pos{}; out.npos != (pos = out.find(c, pos)); pos++) { + out.replace(pos, 1, repl); + } + }; + replace_char(out, '\a', "\\a"); + replace_char(out, '\b', "\\b"); + replace_char(out, '\f', "\\f"); + replace_char(out, '\r', "\\r"); + replace_char(out, '\t', "\\t"); + replace_char(out, '\n', "\\n"); + replace_char(out, '\v', "\\v"); + return out; + }; + + out.resize(col.size()); + std::transform(thrust::make_counting_iterator(size_type{0}), + thrust::make_counting_iterator(col.size()), + out.begin(), + [&](auto idx) { + return h_data.second.empty() || bit_is_set(h_data.second.data(), idx) + ? cleaned(h_data.first[idx]) + : std::string("NULL"); + }); + } + + template >* = nullptr> + void operator()(cudf::column_view const& col, std::vector& out, std::string const&) + { + cudf::dictionary_column_view dictionary(col); + if (col.is_empty()) return; + std::vector keys = to_strings(dictionary.keys()); + std::vector indices = to_strings({dictionary.indices().type(), + dictionary.size(), + dictionary.indices().head(), + dictionary.null_mask(), + dictionary.null_count(), + dictionary.offset()}); + out.insert(out.end(), keys.begin(), keys.end()); + if (!indices.empty()) { + std::string first = "\x08 : " + indices.front(); // use : as delimiter + out.push_back(first); // between keys and indices + out.insert(out.end(), indices.begin() + 1, indices.end()); + } + } + + // Print the tick counts with the units + template ()>* = nullptr> + void operator()(cudf::column_view const& col, std::vector& out, std::string const&) + { + auto h_data = cudf::test::to_host(col); + + out.resize(col.size()); + + if (col.nullable()) { + std::transform(thrust::make_counting_iterator(size_type{0}), + thrust::make_counting_iterator(col.size()), + out.begin(), + [&h_data](auto idx) { + return bit_is_set(h_data.second.data(), idx) + ? numeric_to_string_precise(h_data.first[idx].count()) + + duration_suffix(h_data.first[idx]) + : std::string("NULL"); + }); + + } else { + std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) { + return numeric_to_string_precise(el.count()) + duration_suffix(el); + }); + } + } + + template >* = nullptr> + void operator()(cudf::column_view const& col, + std::vector& out, + std::string const& indent) + { + lists_column_view lcv(col); + + // propagate slicing to the child if necessary + column_view child = lcv.get_sliced_child(cudf::get_default_stream()); + bool const is_sliced = lcv.offset() > 0 || child.offset() > 0; + + std::string tmp = + get_nested_type_str(col) + (is_sliced ? "(sliced)" : "") + ":\n" + indent + + "Length : " + std::to_string(lcv.size()) + "\n" + indent + + "Offsets : " + (lcv.size() > 0 ? nested_offsets_to_string(lcv) : "") + "\n" + + (lcv.parent().nullable() + ? indent + "Null count: " + std::to_string(lcv.null_count()) + "\n" + + detail::to_string(cudf::test::bitmask_to_host(col), col.size(), indent) + "\n" + : "") + + // non-nested types don't typically display their null masks, so do it here for convenience. + (!is_nested(child.type()) && child.nullable() + ? " " + detail::to_string(cudf::test::bitmask_to_host(child), child.size(), indent) + + "\n" + : "") + + (detail::to_string(child, ", ", indent + " ")) + "\n"; + + out.push_back(tmp); + } + + template >* = nullptr> + void operator()(cudf::column_view const& col, + std::vector& out, + std::string const& indent) + { + structs_column_view view{col}; + + std::ostringstream out_stream; + + out_stream << get_nested_type_str(col) << ":\n" + << indent << "Length : " << view.size() << ":\n"; + if (view.nullable()) { + out_stream << indent << "Null count: " << view.null_count() << "\n" + << detail::to_string(cudf::test::bitmask_to_host(col), col.size(), indent) << "\n"; + } + + auto iter = thrust::make_counting_iterator(0); + std::transform( + iter, + iter + view.num_children(), + std::ostream_iterator(out_stream, "\n"), + [&](size_type index) { + auto child = view.get_sliced_child(index, cudf::get_default_stream()); + + // non-nested types don't typically display their null masks, so do it here for convenience. + return (!is_nested(child.type()) && child.nullable() + ? " " + + detail::to_string(cudf::test::bitmask_to_host(child), child.size(), indent) + + "\n" + : "") + + detail::to_string(child, ", ", indent + " "); + }); + + out.push_back(out_stream.str()); + } +}; + +} // namespace + +namespace detail { + +/** + * @copydoc cudf::test::detail::to_strings + */ +std::vector to_strings(cudf::column_view const& col, std::string const& indent) +{ + std::vector reply; + cudf::type_dispatcher(col.type(), column_view_printer{}, col, reply, indent); + return reply; +} + +/** + * @copydoc cudf::test::detail::to_string(cudf::column_view, std::string, std::string) + * + * @param indent Indentation for all output + */ +std::string to_string(cudf::column_view const& col, + std::string const& delimiter, + std::string const& indent) +{ + std::ostringstream buffer; + std::vector h_data = to_strings(col, indent); + + buffer << indent; + std::copy(h_data.begin(), + h_data.end() - (!h_data.empty()), + std::ostream_iterator(buffer, delimiter.c_str())); + if (!h_data.empty()) buffer << h_data.back(); + + return buffer.str(); +} + +/** + * @copydoc cudf::test::detail::to_string(std::vector, size_type, std::string) + * + * @param indent Indentation for all output. See comment in `to_strings` for + * a detailed description. + */ +std::string to_string(std::vector const& null_mask, + size_type null_mask_size, + std::string const& indent) +{ + std::ostringstream buffer; + buffer << indent; + for (int idx = null_mask_size - 1; idx >= 0; idx--) { + buffer << (cudf::bit_is_set(null_mask.data(), idx) ? "1" : "0"); + } + return buffer.str(); +} + +} // namespace detail + +std::vector to_strings(cudf::column_view const& col) +{ + return detail::to_strings(col); +} + +std::string to_string(cudf::column_view const& col, std::string const& delimiter) +{ + return detail::to_string(col, delimiter); +} + +std::string to_string(std::vector const& null_mask, size_type null_mask_size) +{ + return detail::to_string(null_mask, null_mask_size); +} + +void print(cudf::column_view const& col, std::ostream& os) +{ + os << to_string(col, ",") << std::endl; +} + +} // namespace cudf::test diff --git a/cpp/tests/utilities_tests/column_debug_tests.cpp b/cpp/tests/utilities_tests/column_debug_tests.cpp new file mode 100644 index 00000000000..0dae407ad21 --- /dev/null +++ b/cpp/tests/utilities_tests/column_debug_tests.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +template +struct ColumnDebugTestIntegral : public cudf::test::BaseFixture {}; +template +struct ColumnDebugTestFloatingPoint : public cudf::test::BaseFixture {}; + +TYPED_TEST_SUITE(ColumnDebugTestIntegral, cudf::test::IntegralTypes); +TYPED_TEST_SUITE(ColumnDebugTestFloatingPoint, cudf::test::FloatingPointTypes); + +TYPED_TEST(ColumnDebugTestIntegral, PrintColumnNumeric) +{ + char const* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col({1, 2, 3, 4, 5}); + auto std_col = cudf::test::make_type_param_vector({1, 2, 3, 4, 5}); + + std::stringstream tmp; + auto string_iter = + thrust::make_transform_iterator(std::begin(std_col), [](auto e) { return std::to_string(e); }); + + std::copy(string_iter, + string_iter + std_col.size() - 1, + std::ostream_iterator(tmp, delimiter)); + + tmp << std::to_string(std_col.back()); + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); +} + +TYPED_TEST(ColumnDebugTestIntegral, PrintColumnWithInvalids) +{ + char const* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col{{1, 2, 3, 4, 5}, {1, 0, 1, 0, 1}}; + auto std_col = cudf::test::make_type_param_vector({1, 2, 3, 4, 5}); + + std::ostringstream tmp; + tmp << std::to_string(std_col[0]) << delimiter << "NULL" << delimiter + << std::to_string(std_col[2]) << delimiter << "NULL" << delimiter + << std::to_string(std_col[4]); + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); +} + +TYPED_TEST(ColumnDebugTestFloatingPoint, PrintColumnNumeric) +{ + char const* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col( + {10001523.25, 2.0, 3.75, 0.000000034, 5.3}); + + auto expected = std::is_same_v + ? "10001523.25,2,3.75,3.4e-08,5.2999999999999998" + : "10001523,2,3.75,3.39999993e-08,5.30000019"; + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); +} + +TYPED_TEST(ColumnDebugTestFloatingPoint, PrintColumnWithInvalids) +{ + char const* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col( + {10001523.25, 2.0, 3.75, 0.000000034, 5.3}, {1, 0, 1, 0, 1}); + + auto expected = std::is_same_v + ? "10001523.25,NULL,3.75,NULL,5.2999999999999998" + : "10001523,NULL,3.75,NULL,5.30000019"; + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); +} + +struct ColumnDebugStringsTest : public cudf::test::BaseFixture {}; + +TEST_F(ColumnDebugStringsTest, PrintColumnDuration) +{ + char const* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col({100, 0, 7, 140000}); + + auto expected = "100 seconds,0 seconds,7 seconds,140000 seconds"; + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); +} + +TEST_F(ColumnDebugStringsTest, StringsToString) +{ + char const* delimiter = ","; + + std::vector h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"}; + cudf::test::strings_column_wrapper strings( + h_strings.begin(), + h_strings.end(), + thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + + std::ostringstream tmp; + tmp << h_strings[0] << delimiter << h_strings[1] << delimiter << "NULL" << delimiter + << h_strings[3] << delimiter << h_strings[4] << delimiter << h_strings[5] << delimiter + << h_strings[6]; + + EXPECT_EQ(cudf::test::to_string(strings, delimiter), tmp.str()); +} + +TEST_F(ColumnDebugStringsTest, PrintEscapeStrings) +{ + char const* delimiter = ","; + cudf::test::strings_column_wrapper input({"e\te\ne", "é\bé\ré", "e\vé\fé\abell"}); + std::string expected{"e\\te\\ne,é\\bé\\ré,e\\vé\\fé\\abell"}; + EXPECT_EQ(cudf::test::to_string(input, delimiter), expected); +} diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp index 90a7270cb29..07d2bea2b28 100644 --- a/cpp/tests/utilities_tests/column_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp @@ -182,106 +182,6 @@ TEST_F(ColumnUtilitiesStringsTest, StringsToHostAllNulls) EXPECT_TRUE(std::all_of(results.begin(), results.end(), [](auto s) { return s.empty(); })); } -TEST_F(ColumnUtilitiesStringsTest, PrintColumnDuration) -{ - char const* delimiter = ","; - - cudf::test::fixed_width_column_wrapper cudf_col({100, 0, 7, 140000}); - - auto expected = "100 seconds,0 seconds,7 seconds,140000 seconds"; - - EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); -} - -TYPED_TEST(ColumnUtilitiesTestIntegral, PrintColumnNumeric) -{ - char const* delimiter = ","; - - cudf::test::fixed_width_column_wrapper cudf_col({1, 2, 3, 4, 5}); - auto std_col = cudf::test::make_type_param_vector({1, 2, 3, 4, 5}); - - std::stringstream tmp; - auto string_iter = - thrust::make_transform_iterator(std::begin(std_col), [](auto e) { return std::to_string(e); }); - - std::copy(string_iter, - string_iter + std_col.size() - 1, - std::ostream_iterator(tmp, delimiter)); - - tmp << std::to_string(std_col.back()); - - EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); -} - -TYPED_TEST(ColumnUtilitiesTestIntegral, PrintColumnWithInvalids) -{ - char const* delimiter = ","; - - cudf::test::fixed_width_column_wrapper cudf_col{{1, 2, 3, 4, 5}, {1, 0, 1, 0, 1}}; - auto std_col = cudf::test::make_type_param_vector({1, 2, 3, 4, 5}); - - std::ostringstream tmp; - tmp << std::to_string(std_col[0]) << delimiter << "NULL" << delimiter - << std::to_string(std_col[2]) << delimiter << "NULL" << delimiter - << std::to_string(std_col[4]); - - EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); -} - -TYPED_TEST(ColumnUtilitiesTestFloatingPoint, PrintColumnNumeric) -{ - char const* delimiter = ","; - - cudf::test::fixed_width_column_wrapper cudf_col( - {10001523.25, 2.0, 3.75, 0.000000034, 5.3}); - - auto expected = std::is_same_v - ? "10001523.25,2,3.75,3.4e-08,5.2999999999999998" - : "10001523,2,3.75,3.39999993e-08,5.30000019"; - - EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); -} - -TYPED_TEST(ColumnUtilitiesTestFloatingPoint, PrintColumnWithInvalids) -{ - char const* delimiter = ","; - - cudf::test::fixed_width_column_wrapper cudf_col( - {10001523.25, 2.0, 3.75, 0.000000034, 5.3}, {1, 0, 1, 0, 1}); - - auto expected = std::is_same_v - ? "10001523.25,NULL,3.75,NULL,5.2999999999999998" - : "10001523,NULL,3.75,NULL,5.30000019"; - - EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), expected); -} - -TEST_F(ColumnUtilitiesStringsTest, StringsToString) -{ - char const* delimiter = ","; - - std::vector h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"}; - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - - std::ostringstream tmp; - tmp << h_strings[0] << delimiter << h_strings[1] << delimiter << "NULL" << delimiter - << h_strings[3] << delimiter << h_strings[4] << delimiter << h_strings[5] << delimiter - << h_strings[6]; - - EXPECT_EQ(cudf::test::to_string(strings, delimiter), tmp.str()); -} - -TEST_F(ColumnUtilitiesStringsTest, PrintEscapeStrings) -{ - char const* delimiter = ","; - cudf::test::strings_column_wrapper input({"e\te\ne", "é\bé\ré", "e\vé\fé\abell"}); - std::string expected{"e\\te\\ne,é\\bé\\ré,e\\vé\\fé\\abell"}; - EXPECT_EQ(cudf::test::to_string(input, delimiter), expected); -} - TYPED_TEST(ColumnUtilitiesTestFixedPoint, NonNullableToHost) { using namespace numeric; From e8cf0ebd517a9c81e294771a40c74ff5c4fb42da Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:05:26 -0400 Subject: [PATCH 75/76] Expose stream parameter in public strings combine APIs (#14281) Add stream parameter to public APIs: - `cudf::strings::concatenate()` (x2) - `cudf::strings::join_strings()` - `cudf::strings::join_list_elements()` (x2) - `cudf::strings::repeat_string()` - `cudf::strings::repeat_strings()` (x2) Also added stream gtests and fixed up some doxygen comments. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/14281 --- cpp/include/cudf/strings/combine.hpp | 88 ++++++++++-------- cpp/include/cudf/strings/repeat_strings.hpp | 8 +- cpp/src/strings/combine/concatenate.cu | 14 +-- cpp/src/strings/combine/join.cu | 3 +- cpp/src/strings/combine/join_list_elements.cu | 13 +-- cpp/src/strings/repeat_strings.cu | 11 ++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/strings/combine_test.cpp | 93 +++++++++++++++++++ 8 files changed, 168 insertions(+), 63 deletions(-) create mode 100644 cpp/tests/streams/strings/combine_test.cpp diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 71f65ac9080..568e8ac50ec 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -66,18 +66,20 @@ enum class output_if_empty_list { * * @throw cudf::logic_error if separator is not valid. * - * @param strings Strings for this operation. + * @param input Strings for this operation * @param separator String that should inserted between each string. * Default is an empty string. - * @param narep String that should represent any null strings found. + * @param narep String to replace any null strings found. * Default of invalid-scalar will ignore any null entries. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return New column containing one string. */ std::unique_ptr join_strings( - strings_column_view const& strings, + strings_column_view const& input, string_scalar const& separator = string_scalar(""), string_scalar const& narep = string_scalar("", false), + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -127,18 +129,17 @@ std::unique_ptr join_strings( * @throw cudf::logic_error if the number of rows from @p separators and @p strings_columns * do not match * - * @param strings_columns List of strings columns to concatenate. + * @param strings_columns List of strings columns to concatenate * @param separators Strings column that provides the separator for a given row - * @param separator_narep String that should be used in place of a null separator for a given - * row. Default of invalid-scalar means no row separator value replacements. - * Default is an invalid string. - * @param col_narep String that should be used in place of any null strings - * found in any column. Default of invalid-scalar means no null column value replacements. - * Default is an invalid string. + * @param separator_narep String to replace a null separator for a given row. + * Default of invalid-scalar means no row separator value replacements. + * @param col_narep String that should be used in place of any null strings found in any column. + * Default of invalid-scalar means no null column value replacements. * @param separate_nulls If YES, then the separator is included for null rows * if `col_narep` is valid. - * @param mr Resource for allocating device memory. - * @return New column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Resource for allocating device memory + * @return New column with concatenated results */ std::unique_ptr concatenate( table_view const& strings_columns, @@ -146,6 +147,7 @@ std::unique_ptr concatenate( string_scalar const& separator_narep = string_scalar("", false), string_scalar const& col_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -184,21 +186,23 @@ std::unique_ptr concatenate( * @throw cudf::logic_error if separator is not valid. * @throw cudf::logic_error if only one column is specified * - * @param strings_columns List of string columns to concatenate. + * @param strings_columns List of string columns to concatenate * @param separator String that should inserted between each string from each row. * Default is an empty string. - * @param narep String that should be used in place of any null strings - * found in any column. Default of invalid-scalar means any null entry in any column will + * @param narep String to replace any null strings found in any column. + * Default of invalid-scalar means any null entry in any column will * produces a null result for that row. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column with concatenated results. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column with concatenated results */ std::unique_ptr concatenate( table_view const& strings_columns, string_scalar const& separator = string_scalar(""), string_scalar const& narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -243,19 +247,20 @@ std::unique_ptr concatenate( * @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do * not match * - * @param lists_strings_column Column containing lists of strings to concatenate. - * @param separators Strings column that provides separators for concatenation. - * @param separator_narep String that should be used to replace null separator, default is an - * invalid-scalar denoting that rows containing null separator will result in null string in - * the corresponding output rows. - * @param string_narep String that should be used to replace null strings in any non-null list row, - * default is an invalid-scalar denoting that list rows containing null strings will result - * in null string in the corresponding output rows. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will + * @param lists_strings_column Column containing lists of strings to concatenate + * @param separators Strings column that provides separators for concatenation + * @param separator_narep String that should be used to replace a null separator. + * Default is an invalid-scalar denoting that rows containing null separator will result in + * a null string in the corresponding output rows. + * @param string_narep String to replace null strings in any non-null list row. + * Default is an invalid-scalar denoting that list rows containing null strings will result + * in a null string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will * result in an empty string. Otherwise, it will result in a null. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with concatenated results */ std::unique_ptr join_list_elements( lists_column_view const& lists_strings_column, @@ -264,6 +269,7 @@ std::unique_ptr join_list_elements( string_scalar const& string_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -303,17 +309,18 @@ std::unique_ptr join_list_elements( * @throw cudf::logic_error if input column is not lists of strings column. * @throw cudf::logic_error if separator is not valid. * - * @param lists_strings_column Column containing lists of strings to concatenate. - * @param separator String that should inserted between strings of each list row, default is an - * empty string. - * @param narep String that should be used to replace null strings in any non-null list row, default - * is an invalid-scalar denoting that list rows containing null strings will result in null - * string in the corresponding output rows. - * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. - * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result + * @param lists_strings_column Column containing lists of strings to concatenate + * @param separator String to insert between strings of each list row. + * Default is an empty string. + * @param narep String to replace null strings in any non-null list row. + * Default is an invalid-scalar denoting that list rows containing null strings will result + * in a null string in the corresponding output rows. + * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid + * @param empty_list_policy If set to EMPTY_STRING, any input row that is an empty list will result * in an empty string. Otherwise, it will result in a null. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column with concatenated results. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column with concatenated results */ std::unique_ptr join_list_elements( lists_column_view const& lists_strings_column, @@ -321,6 +328,7 @@ std::unique_ptr join_list_elements( string_scalar const& narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index 2b6575f80d0..7dc9c33f579 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -52,12 +52,14 @@ namespace strings { * * @param input The scalar containing the string to repeat * @param repeat_times The number of times the input string is repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned string scalar * @return New string scalar in which the input string is repeated */ std::unique_ptr repeat_string( string_scalar const& input, size_type repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,12 +83,14 @@ std::unique_ptr repeat_string( * * @param input The column containing strings to repeat * @param repeat_times The number of times each input string is repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned strings column * @return New column containing the repeated strings */ std::unique_ptr repeat_strings( strings_column_view const& input, size_type repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -115,13 +119,15 @@ std::unique_ptr repeat_strings( * * @param input The column containing strings to repeat * @param repeat_times The column containing numbers of times that the corresponding input strings - * are repeated + * for each row are repeated + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned strings column * @return New column containing the repeated strings. */ std::unique_ptr repeat_strings( strings_column_view const& input, column_view const& repeat_times, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu index ba8acd23467..0a11b6dc460 100644 --- a/cpp/src/strings/combine/concatenate.cu +++ b/cpp/src/strings/combine/concatenate.cu @@ -267,11 +267,11 @@ std::unique_ptr concatenate(table_view const& strings_columns, string_scalar const& separator, string_scalar const& narep, separator_on_nulls separate_nulls, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate( - strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr); + return detail::concatenate(strings_columns, separator, narep, separate_nulls, stream, mr); } std::unique_ptr concatenate(table_view const& strings_columns, @@ -279,16 +279,12 @@ std::unique_ptr concatenate(table_view const& strings_columns, string_scalar const& separator_narep, string_scalar const& col_narep, separator_on_nulls separate_nulls, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::concatenate(strings_columns, - separators, - separator_narep, - col_narep, - separate_nulls, - cudf::get_default_stream(), - mr); + return detail::concatenate( + strings_columns, separators, separator_narep, col_narep, separate_nulls, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index faf1be6a26f..9ab527feaf8 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -180,10 +180,11 @@ std::unique_ptr join_strings(strings_column_view const& input, std::unique_ptr join_strings(strings_column_view const& strings, string_scalar const& separator, string_scalar const& narep, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr); + return detail::join_strings(strings, separator, narep, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index eee59e37478..372b49fb0ee 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -301,16 +301,12 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_list_elements(lists_strings_column, - separator, - narep, - separate_nulls, - empty_list_policy, - cudf::get_default_stream(), - mr); + return detail::join_list_elements( + lists_strings_column, separator, narep, separate_nulls, empty_list_policy, stream, mr); } std::unique_ptr join_list_elements(lists_column_view const& lists_strings_column, @@ -319,6 +315,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); @@ -328,7 +325,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_narep, separate_nulls, empty_list_policy, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 396e1e6a2ac..847a64f5602 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -67,7 +67,7 @@ std::unique_ptr repeat_string(string_scalar const& input, return in_ptr[idx % str_size]; }); - return std::make_unique(std::move(buff)); + return std::make_unique(std::move(buff), true, stream, mr); } namespace { @@ -260,26 +260,29 @@ std::unique_ptr repeat_strings(strings_column_view const& input, std::unique_ptr repeat_string(string_scalar const& input, size_type repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_string(input, repeat_times, stream, mr); } std::unique_ptr repeat_strings(strings_column_view const& input, size_type repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_strings(input, repeat_times, stream, mr); } std::unique_ptr repeat_strings(strings_column_view const& input, column_view const& repeat_times, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr); + return detail::repeat_strings(input, repeat_times, stream, mr); } } // namespace strings diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index eb0585d8f3e..e7f4914fe05 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -637,6 +637,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp + streams/strings/combine_test.cpp streams/strings/convert_test.cpp streams/strings/find_test.cpp streams/strings/replace_test.cpp diff --git a/cpp/tests/streams/strings/combine_test.cpp b/cpp/tests/streams/strings/combine_test.cpp new file mode 100644 index 00000000000..9562634957a --- /dev/null +++ b/cpp/tests/streams/strings/combine_test.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class StringsCombineTest : public cudf::test::BaseFixture {}; + +TEST_F(StringsCombineTest, Concatenate) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::table_view({input, input}); + + auto separators = cudf::test::strings_column_wrapper({"_", ".", " "}); + auto separators_view = cudf::strings_column_view(separators); + auto sep_on_null = cudf::strings::separator_on_nulls::YES; + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::concatenate(view, separator, narep, sep_on_null, cudf::test::get_default_stream()); + cudf::strings::concatenate( + view, separators_view, narep, narep, sep_on_null, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, Join) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::strings_column_view(input); + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::join_strings(view, separator, narep, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, JoinLists) +{ + using STR_LISTS = cudf::test::lists_column_wrapper; + auto const input = STR_LISTS{ + STR_LISTS{"a", "bb", "ccc"}, STR_LISTS{"ddd", "efgh", "ijk"}, STR_LISTS{"zzz", "xxxxx"}}; + auto view = cudf::lists_column_view(input); + + auto separators = cudf::test::strings_column_wrapper({"_", ".", " "}); + auto separators_view = cudf::strings_column_view(separators); + auto sep_on_null = cudf::strings::separator_on_nulls::YES; + auto if_empty = cudf::strings::output_if_empty_list::EMPTY_STRING; + + auto const separator = cudf::string_scalar(" ", true, cudf::test::get_default_stream()); + auto const narep = cudf::string_scalar("n/a", true, cudf::test::get_default_stream()); + cudf::strings::join_list_elements( + view, separator, narep, sep_on_null, if_empty, cudf::test::get_default_stream()); + cudf::strings::join_list_elements( + view, separators_view, narep, narep, sep_on_null, if_empty, cudf::test::get_default_stream()); +} + +TEST_F(StringsCombineTest, Repeat) +{ + auto input = cudf::test::strings_column_wrapper({"Héllo", "thesé", "tést"}); + auto view = cudf::strings_column_view(input); + cudf::strings::repeat_strings(view, 0, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, 1, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, 10, cudf::test::get_default_stream()); + + auto counts = cudf::test::fixed_width_column_wrapper({9, 8, 7}); + cudf::strings::repeat_strings(view, counts, cudf::test::get_default_stream()); + cudf::strings::repeat_strings(view, counts, cudf::test::get_default_stream()); + + auto const str = cudf::string_scalar("X", true, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 0, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 1, cudf::test::get_default_stream()); + cudf::strings::repeat_string(str, 10, cudf::test::get_default_stream()); + + auto const invalid = cudf::string_scalar("", false, cudf::test::get_default_stream()); + cudf::strings::repeat_string(invalid, 10, cudf::test::get_default_stream()); +} From 630982a370185112b2ea9a6f47284d98e12a36e8 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 23 Oct 2023 16:45:02 -0700 Subject: [PATCH 76/76] test is_valid before reading column data (#14318) Fixes #14310. Authors: - Ed Seidl (https://github.com/etseidl) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/14318 --- cpp/src/io/parquet/page_enc.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 1e4f061d2e0..9acafd50585 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -283,7 +283,7 @@ __device__ uint8_t const* delta_encode(page_enc_state_s<0>* s, cur_val_idx += nvals; - output_type v = s->col.leaf_column->element(val_idx); + output_type v = is_valid ? s->col.leaf_column->element(val_idx) : 0; if (scale < 0) { v /= -scale; } else {