From ee3659cec06869eb99ab465ca6a5ad26fb10efff Mon Sep 17 00:00:00 2001 From: Ray Douglass <ray@raydouglass.com> Date: Thu, 18 Jan 2024 15:06:16 -0500 Subject: [PATCH 01/10] DOC v24.04 Updates [skip ci] --- .github/workflows/build.yaml | 12 ++++++------ .github/workflows/pr.yaml | 18 +++++++++--------- .github/workflows/test.yaml | 6 +++--- VERSION | 2 +- ci/build_docs.sh | 2 +- .../environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- .../environments/all_cuda-120_arch-x86_64.yaml | 4 ++-- cpp/CMakeLists.txt | 2 +- cpp/Doxyfile | 2 +- dependencies.yaml | 4 ++-- fetch_rapids.cmake | 2 +- python/pylibwholegraph/CMakeLists.txt | 2 +- 12 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 6840ae21f..ff7b600bc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -49,7 +49,7 @@ jobs: if: github.ref_type == 'branch' needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -62,7 +62,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 3b6655c78..2c2578e04 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -21,41 +21,41 @@ jobs: - wheel-build-pylibwholegraph - wheel-test-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: build_type: pull-request arch: "amd64" @@ -64,14 +64,14 @@ jobs: wheel-build-pylibwholegraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: ci/build_wheel.sh wheel-test-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request script: ci/test_wheel.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 52319b3fd..489348971 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-pytorch-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/VERSION b/VERSION index 3c6c5e2b7..4a2fe8aa5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.02.00 +24.04.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4f3af9c18..698754b28 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -22,7 +22,7 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) -export RAPIDS_VERSION_NUMBER="24.02" +export RAPIDS_VERSION_NUMBER="24.04" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-mamba-retry install \ diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 825ec1f7d..85e063967 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -24,8 +24,8 @@ dependencies: - graphviz - ipykernel - ipython -- libraft-headers==24.2.* -- librmm==24.2.* +- libraft-headers==24.4.* +- librmm==24.4.* - nanobind>=0.2.0 - nbsphinx - nccl diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 568c312b4..fd90a0b32 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -25,8 +25,8 @@ dependencies: - graphviz - ipykernel - ipython -- libraft-headers==24.2.* -- librmm==24.2.* +- libraft-headers==24.4.* +- librmm==24.4.* - nanobind>=0.2.0 - nbsphinx - nccl diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9ac286b79..4968c28a4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,7 +14,7 @@ # limitations under the License. #============================================================================= -set(RAPIDS_VERSION "24.02") +set(RAPIDS_VERSION "24.04") set(WHOLEGRAPH_VERSION "${RAPIDS_VERSION}.00") cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) diff --git a/cpp/Doxyfile b/cpp/Doxyfile index 357e85685..e480d8ef4 100644 --- a/cpp/Doxyfile +++ b/cpp/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "WholeGraph C API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 24.02 +PROJECT_NUMBER = 24.04 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/dependencies.yaml b/dependencies.yaml index 17ed61598..72b76aa1a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -74,8 +74,8 @@ dependencies: - cxx-compiler - cython>=3.0.0 - &doxygen doxygen==1.9.1 - - libraft-headers==24.2.* - - librmm==24.2.* + - libraft-headers==24.4.* + - librmm==24.4.* - nanobind>=0.2.0 - nccl - scikit-build diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 1f099e8f8..3f9023810 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS.cmake ) endif() diff --git a/python/pylibwholegraph/CMakeLists.txt b/python/pylibwholegraph/CMakeLists.txt index 758fe3713..97576c5f3 100644 --- a/python/pylibwholegraph/CMakeLists.txt +++ b/python/pylibwholegraph/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) -set(RAPIDS_VERSION "24.02") +set(RAPIDS_VERSION "24.04") set(WHOLEGRAPH_VERSION "${RAPIDS_VERSION}.00") include(FetchContent) From dca9fd62d30f0e42b8294a09b615d9ba9e1bed94 Mon Sep 17 00:00:00 2001 From: James Lamb <jlamb@nvidia.com> Date: Mon, 5 Feb 2024 10:24:12 -0600 Subject: [PATCH 02/10] handle more RAPIDS version formats in update-version.sh (#122) Contributes to https://github.com/rapidsai/build-planning/issues/13. Updates `update-version.sh` to correctly handle RAPIDS dependencies like `cudf-cu12==24.2.*`. This project doesn't appear to have any of those right now, but might in the future. Also removes some references to Sphinx `conf.py` files in `update-version.sh`... this repo doesn't have those. ### How I tested this The portability of this updated `sed` command was tested here: https://github.com/rapidsai/cudf/pull/14825#issuecomment-1904735849. In this repo, I ran the following: ```shell ./ci/release/update-version.sh '23.10.00' git diff ./ci/release/update-version.sh '24.04.00' git diff ``` Confirmed that that first `git diff` changed all the things I expected, and that second one showed 0 changes. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/wholegraph/pull/122 --- ci/release/update-version.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 0f64b213e..da9a19e9f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -53,10 +53,6 @@ sed_runner 's/'"RAPIDS_VERSION \".*\")"'/'"RAPIDS_VERSION \"${NEXT_SHORT_TAG}\") # Python CMakeLists updates sed_runner '/set(RAPIDS_VERSION/ s/".*"/'\"${NEXT_SHORT_TAG}\"'/g' python/pylibwholegraph/CMakeLists.txt -# RTD update -sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/wholegraph/source/conf.py -sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/wholegraph/source/conf.py - # Centralized version file update echo "${NEXT_FULL_TAG}" > VERSION @@ -72,10 +68,10 @@ DEPENDENCIES=( ) for DEP in "${DEPENDENCIES[@]}"; do for FILE in dependencies.yaml conda/environments/*.yaml; do - sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" ${FILE} + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" ${FILE} done for FILE in python/**/pyproject.toml; do - sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} + sed_runner "/\"${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} done done From 17f58ec7fd7e18c789a02f980cff5ea18e6ce85c Mon Sep 17 00:00:00 2001 From: James Lamb <jlamb@nvidia.com> Date: Mon, 12 Feb 2024 09:12:38 -0600 Subject: [PATCH 03/10] Support CUDA 12.2 (#116) * switches to CUDA 12.2.2 for building conda packages and wheels * adds new tests running against CUDA 12.2.2 ### Notes for Reviewers This is part of ongoing work to build and test packages against CUDA 12.2.2 across all of RAPIDS. For more details see: * https://github.com/rapidsai/build-planning/issues/7 * https://github.com/rapidsai/shared-workflows/pull/166 Planning a second round of PRs to revert these references back to a proper `branch-24.{nn}` release branch of `shared-workflows` once https://github.com/rapidsai/shared-workflows/pull/166 is merged. *(created with `rapids-reviser`)* Authors: - James Lamb (https://github.com/jameslamb) - Bradley Dice (https://github.com/bdice) - Ray Douglass (https://github.com/raydouglass) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/wholegraph/pull/116 --- .github/workflows/build.yaml | 12 ++++++------ .github/workflows/pr.yaml | 18 +++++++++--------- .github/workflows/test.yaml | 6 +++--- ...6_64.yaml => all_cuda-122_arch-x86_64.yaml} | 4 ++-- conda/recipes/libwholegraph/meta.yaml | 11 ++++++++++- conda/recipes/pylibwholegraph/meta.yaml | 4 ++++ dependencies.yaml | 6 +++++- 7 files changed, 39 insertions(+), 22 deletions(-) rename conda/environments/{all_cuda-120_arch-x86_64.yaml => all_cuda-122_arch-x86_64.yaml} (94%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ff7b600bc..3432405f9 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -49,7 +49,7 @@ jobs: if: github.ref_type == 'branch' needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -62,7 +62,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2c2578e04..6e431826f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -21,41 +21,41 @@ jobs: - wheel-build-pylibwholegraph - wheel-test-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@test-cuda-12.2 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@test-cuda-12.2 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 with: build_type: pull-request arch: "amd64" @@ -64,14 +64,14 @@ jobs: wheel-build-pylibwholegraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel.sh wheel-test-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 489348971..e276f09c6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-pytorch-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml similarity index 94% rename from conda/environments/all_cuda-120_arch-x86_64.yaml rename to conda/environments/all_cuda-122_arch-x86_64.yaml index fd90a0b32..ad012959a 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cuda-cudart-dev - cuda-nvcc - cuda-nvtx -- cuda-version=12.0 +- cuda-version=12.2 - cudnn=8.8 - cxx-compiler - cython>=3.0.0 @@ -46,4 +46,4 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - sysroot_linux-64=2.17 -name: all_cuda-120_arch-x86_64 +name: all_cuda-122_arch-x86_64 diff --git a/conda/recipes/libwholegraph/meta.yaml b/conda/recipes/libwholegraph/meta.yaml index cb6522aaa..fd1b3dfa9 100644 --- a/conda/recipes/libwholegraph/meta.yaml +++ b/conda/recipes/libwholegraph/meta.yaml @@ -4,7 +4,6 @@ {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} {% set cuda_major = cuda_version.split('.')[0] %} -{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0 {% set date_string = environ['RAPIDS_DATE_STRING'] %} package: @@ -78,6 +77,9 @@ outputs: ignore_run_exports_from: {% if cuda_major == "11" %} - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev {% endif %} requirements: build: @@ -92,6 +94,8 @@ outputs: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart {% endif %} - libraft ={{ minor_version }} - nccl {{ nccl_version }} @@ -109,6 +113,9 @@ outputs: ignore_run_exports_from: {% if cuda_major == "11" %} - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev {% endif %} requirements: build: @@ -124,6 +131,8 @@ outputs: - {{ pin_subpackage('libwholegraph', exact=True) }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart {% endif %} - gmock {{ gtest_version }} - gtest {{ gtest_version }} diff --git a/conda/recipes/pylibwholegraph/meta.yaml b/conda/recipes/pylibwholegraph/meta.yaml index 0c7d57dc3..c67f8e131 100644 --- a/conda/recipes/pylibwholegraph/meta.yaml +++ b/conda/recipes/pylibwholegraph/meta.yaml @@ -35,7 +35,11 @@ build: - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% else %} - {{ compiler('cuda') }} + {% endif %} requirements: build: diff --git a/dependencies.yaml b/dependencies.yaml index c3bfbd892..af610b98a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: [conda] matrix: - cuda: ["11.8", "12.0"] + cuda: ["11.8", "12.2"] arch: [x86_64] includes: - checks @@ -137,6 +137,10 @@ dependencies: cuda: "12.0" packages: - cuda-version=12.0 + - matrix: + cuda: "12.2" + packages: + - cuda-version=12.2 cuda: specific: - output_types: conda From 41728c7e131605955db2a197886772c8aec0bddf Mon Sep 17 00:00:00 2001 From: James Lamb <jlamb@nvidia.com> Date: Tue, 20 Feb 2024 12:18:37 -0600 Subject: [PATCH 04/10] target branch-24.04 for GitHub Actions workflows (#135) Follow-up to #116 For all GitHub Actions configs, replaces uses of the `test-cuda-12.2` branch on `shared-workflows` with `branch-24.04`, now that https://github.com/rapidsai/shared-workflows/pull/166 has been merged. ### Notes for Reviewers This is part of ongoing work to build and test packages against CUDA 12.2 across all of RAPIDS. For more details see: * https://github.com/rapidsai/build-planning/issues/7 *(created with `rapids-reviser`)* Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/wholegraph/pull/135 --- .github/workflows/build.yaml | 12 ++++++------ .github/workflows/pr.yaml | 18 +++++++++--------- .github/workflows/test.yaml | 6 +++--- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3432405f9..ff7b600bc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -49,7 +49,7 @@ jobs: if: github.ref_type == 'branch' needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -62,7 +62,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -80,7 +80,7 @@ jobs: wheel-publish-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6e431826f..2c2578e04 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -21,41 +21,41 @@ jobs: - wheel-build-pylibwholegraph - wheel-test-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 with: build_type: pull-request arch: "amd64" @@ -64,14 +64,14 @@ jobs: wheel-build-pylibwholegraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: ci/build_wheel.sh wheel-test-pylibwholegraph: needs: wheel-build-pylibwholegraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request script: ci/test_wheel.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e276f09c6..489348971 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-pytorch-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibwholegraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} From cc633b92c9cd19d8de0f2f363d386149762a85ed Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Wed, 21 Feb 2024 09:07:17 -0600 Subject: [PATCH 05/10] updating ops-bot.yaml (#126) --- .github/ops-bot.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml index 9a0b41550..2ed5231ae 100644 --- a/.github/ops-bot.yaml +++ b/.github/ops-bot.yaml @@ -6,3 +6,4 @@ branch_checker: true label_checker: true release_drafter: true recently_updated: true +forward_merger: true From aafd5be421c95d909aa37b2c6f6df3b67c292a4c Mon Sep 17 00:00:00 2001 From: James Lamb <jlamb@nvidia.com> Date: Thu, 29 Feb 2024 11:53:33 -0600 Subject: [PATCH 06/10] Add support for Python 3.11, require NumPy 1.23+ (#139) Contributes to https://github.com/rapidsai/build-planning/issues/3 This PR adds support for Python 3.11. It also bumps uses of `NumPy` to `numpy>=1.23`, see https://github.com/rapidsai/build-planning/issues/3#issuecomment-1967952280. ## Notes for Reviewers This is part of ongoing work to add Python 3.11 support across RAPIDS. The Python 3.11 CI workflows introduced in https://github.com/rapidsai/shared-workflows/pull/176 are *optional*... they are not yet required to run successfully for PRs to be merged. This PR can be merged once all jobs are running successfully (including the non-required jobs for Python 3.11). The CI logs should be verified that the jobs are building and testing with Python 3.11. See https://github.com/rapidsai/shared-workflows/pull/176 for more details. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/wholegraph/pull/139 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/environments/all_cuda-122_arch-x86_64.yaml | 4 ++-- dependencies.yaml | 8 ++++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 85e063967..126a1ae02 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -30,7 +30,7 @@ dependencies: - nbsphinx - nccl - ninja -- numpy>=1.17 +- numpy>=1.23 - numpydoc - nvcc_linux-64=11.8 - pre-commit @@ -38,7 +38,7 @@ dependencies: - pytest - pytest-forked - pytest-xdist -- python>=3.9,<3.11 +- python>=3.9,<3.12 - pytorch-cuda=11.8 - pytorch=2.0.0 - recommonmark diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml index ad012959a..5f765c652 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -31,14 +31,14 @@ dependencies: - nbsphinx - nccl - ninja -- numpy>=1.17 +- numpy>=1.23 - numpydoc - pre-commit - pydata-sphinx-theme - pytest - pytest-forked - pytest-xdist -- python>=3.9,<3.11 +- python>=3.9,<3.12 - recommonmark - scikit-build - sphinx-copybutton diff --git a/dependencies.yaml b/dependencies.yaml index af610b98a..2425d2e37 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -188,8 +188,12 @@ dependencies: packages: - python=3.10 - matrix: + py: "3.11" packages: - - python>=3.9,<3.11 + - python=3.11 + - matrix: + packages: + - python>=3.9,<3.12 run: common: - output_types: [conda, requirements] @@ -209,7 +213,7 @@ dependencies: - output_types: [conda, requirements] packages: - ninja - - numpy>=1.17 + - numpy>=1.23 - pytest - pytest-forked - pytest-xdist From 0668299685a69bd857bc8fd1b5c953cde0706561 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:09:08 -0800 Subject: [PATCH 07/10] Add environment-agnostic scripts for running ctests and pytests (#128) This PR adds environment-agnostic `run_{ctests,pytests}.sh` scripts, and updates `test_{cpp,python}.sh` to call them. The `test_{cpp,python}.sh` scripts assume they're running in our CI environment, and they do more than just run the tests. This PR allows devs and downstream consumers to only run the tests, and skip the unrelated logic in `test_{cpp,python}.sh`. Authors: - Paul Taylor (https://github.com/trxcllnt) - Ray Douglass (https://github.com/raydouglass) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/wholegraph/pull/128 --- ci/run_ctests.sh | 9 ++++++ ci/run_pytests.sh | 9 ++++++ ci/test_cpp.sh | 28 ++++++------------- ci/test_python.sh | 14 ++++------ ci/test_wheel.sh | 6 ++-- .../pylibwholegraph/tests/__init__.py | 0 .../tests/pylibwholegraph/__init__.py | 0 .../tests/wholegraph_torch/__init__.py | 0 .../tests/wholegraph_torch/ops/__init__.py | 0 9 files changed, 34 insertions(+), 32 deletions(-) create mode 100755 ci/run_ctests.sh create mode 100755 ci/run_pytests.sh delete mode 100644 python/pylibwholegraph/pylibwholegraph/tests/__init__.py delete mode 100644 python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/__init__.py delete mode 100644 python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/__init__.py delete mode 100644 python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/__init__.py diff --git a/ci/run_ctests.sh b/ci/run_ctests.sh new file mode 100755 index 000000000..1ebbf4c55 --- /dev/null +++ b/ci/run_ctests.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +# Support customizing the ctests' install location +cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libwholegraph/" + +find . -type f -executable -print0 | xargs -0 -r -t -n1 -P1 sh -c 'exec "$0"'; diff --git a/ci/run_pytests.sh b/ci/run_pytests.sh new file mode 100755 index 000000000..b21cbf913 --- /dev/null +++ b/ci/run_pytests.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +# Support invoking run_pytests.sh outside the script directory +cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/ + +pytest --cache-clear --forked --import-mode=append "$@" tests diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 281318047..0dd5626bc 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -1,8 +1,11 @@ #!/bin/bash -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. set -euo pipefail +# Support invoking test_cpp.sh outside the script directory +cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ + . /opt/conda/etc/profile.d/conda.sh rapids-logger "Generate C++ testing dependencies" @@ -32,25 +35,10 @@ rapids-mamba-retry install \ rapids-logger "Check GPU usage" nvidia-smi -EXITCODE=0 -trap "EXITCODE=1" ERR -set +e # Run libwholegraph tests from libwholegraph-tests package rapids-logger "Run tests" -INSTALLED_TEST_PATH=${CONDA_PREFIX}/bin/gtests/libwholegraph - -for file in "${INSTALLED_TEST_PATH}"/*; do - if [[ -x "$file" ]]; then - rapids-logger "Running: $file" - "$file" - exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - echo "Test $file returned a non-zero exit code: $exit_code" - exit $exit_code - fi - fi -done - -exit 0 +./ci/run_ctests.sh && EXITCODE=$? || EXITCODE=$? + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_python.sh b/ci/test_python.sh index 2dff744e5..941ac38fd 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -1,8 +1,11 @@ #!/bin/bash -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. set -euo pipefail +# Support invoking test_cpp.sh outside the script directory +cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ + . /opt/conda/etc/profile.d/conda.sh ARCH=$(arch) @@ -50,16 +53,9 @@ rapids-mamba-retry install \ rapids-logger "Check GPU usage" nvidia-smi -trap "EXITCODE=1" ERR -set +e rapids-logger "pytest pylibwholegraph" -PYLIBWHOLEGRAPH_INSTALL_PATH=`python -c 'import os; import pylibwholegraph; print(os.path.dirname(pylibwholegraph.__file__))'` -PYTEST_PATH=${PYLIBWHOLEGRAPH_INSTALL_PATH}/tests -pytest \ - --cache-clear \ - --forked \ - ${PYTEST_PATH} +./ci/run_pytests.sh && EXITCODE=$? || EXITCODE=$? echo "test_python is exiting with value: ${EXITCODE}" exit ${EXITCODE} diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 041333d3a..9fdeec250 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -27,9 +27,9 @@ rapids-logger "Installing PyTorch" rapids-retry python -m pip install --pre torch --index-url ${INDEX_URL} rapids-retry python -m pip install pytest pytest-forked numpy rapids-logger "pytest pylibwholegraph" -PYLIBWHOLEGRAPH_INSTALL_PATH=`python -c 'import os; import pylibwholegraph; print(os.path.dirname(pylibwholegraph.__file__))'` -PYTEST_PATH=${PYLIBWHOLEGRAPH_INSTALL_PATH}/tests +cd python/pylibwholegraph/pylibwholegraph/tests python -m pytest \ --cache-clear \ --forked \ - ${PYTEST_PATH} + --import-mode=append \ + . diff --git a/python/pylibwholegraph/pylibwholegraph/tests/__init__.py b/python/pylibwholegraph/pylibwholegraph/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/__init__.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/__init__.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/__init__.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/__init__.py deleted file mode 100644 index e69de29bb..000000000 From 0f54f73767d949179b1fdb9786cdbb9b052d2803 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:10:26 -0800 Subject: [PATCH 08/10] Use default `rapids-cmake` CUDA_ARCHITECTURES (#140) CMake [initializes `CMAKE_CUDA_ARCHITECTURES` from the `CUDAARCHS` environment variable](https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html). Defaulting `CMAKE_CUDA_ARCHITECTURES` to a fixed list of archs prevents CMake from initializing the value from the envvar. This PR skips setting `CMAKE_CUDA_ARCHITECTURES`, and instead uses `rapids-cmake` default behavior of [initializing from the `CUDAARCHS` envvar](https://github.com/rapidsai/rapids-cmake/blob/branch-24.04/rapids-cmake/cuda/init_architectures.cmake#L73-L93) and falling back to the same default [archs list](https://github.com/rapidsai/rapids-cmake/blob/branch-24.04/rapids-cmake/cuda/set_architectures.cmake#L54) as is currently used in `build.sh`. Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/wholegraph/pull/140 --- cpp/CMakeLists.txt | 3 --- python/pylibwholegraph/CMakeLists.txt | 3 --- 2 files changed, 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4968c28a4..dc75bd99c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -33,9 +33,6 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86) -endif () rapids_cuda_init_architectures(WHOLEGRAPH) project(WHOLEGRAPH VERSION ${WHOLEGRAPH_VERSION} LANGUAGES CXX CUDA) diff --git a/python/pylibwholegraph/CMakeLists.txt b/python/pylibwholegraph/CMakeLists.txt index 97576c5f3..34a788f55 100644 --- a/python/pylibwholegraph/CMakeLists.txt +++ b/python/pylibwholegraph/CMakeLists.txt @@ -31,9 +31,6 @@ include(rapids-cmake) include(rapids-cuda) include(rapids-cpm) -if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86) -endif () rapids_cuda_init_architectures(PYLIBWHOLEGRAPH) project(PYLIBWHOLEGRAPH VERSION ${WHOLEGRAPH_VERSION} LANGUAGES C CXX CUDA) From aa94b052d1954aafcf3bad339e3d8b9d0881e20f Mon Sep 17 00:00:00 2001 From: linhu-nv <141609318+linhu-nv@users.noreply.github.com> Date: Wed, 6 Mar 2024 02:58:59 +0800 Subject: [PATCH 09/10] use enum to implement log_level in wholememory (#136) Authors: - https://github.com/linhu-nv Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/wholegraph/pull/136 --- cpp/include/wholememory/wholememory.h | 14 +++++++++-- cpp/src/logger.cpp | 8 +++---- cpp/src/logger.hpp | 24 +++++++------------ cpp/src/wholememory/initialize.cpp | 5 ++-- cpp/src/wholememory/initialize.hpp | 2 +- cpp/src/wholememory/wholememory.cpp | 4 ++-- .../binding/wholememory_binding.pyx | 23 +++++++++++++++--- .../pylibwholegraph/torch/common_options.py | 2 +- .../pylibwholegraph/torch/initialize.py | 9 ++++--- .../pylibwholegraph/torch/utils.py | 18 ++++++++++++++ 10 files changed, 73 insertions(+), 36 deletions(-) diff --git a/cpp/include/wholememory/wholememory.h b/cpp/include/wholememory/wholememory.h index 885dddd8e..7fc6ad174 100644 --- a/cpp/include/wholememory/wholememory.h +++ b/cpp/include/wholememory/wholememory.h @@ -80,13 +80,23 @@ enum wholememory_distributed_backend_t { WHOLEMEMORY_DB_NCCL, WHOLEMEMORY_DB_NVSHMEM, }; + +enum LogLevel { + LEVEL_FATAL = 0, /*!< Fatal */ + LEVEL_ERROR, /*!< Error */ + LEVEL_WARN, /*!< Warn */ + LEVEL_INFO, /*!< Info */ + LEVEL_DEBUG, /*!< Debug*/ + LEVEL_TRACE /*!< Trace */ +}; + /** * Initialize WholeMemory library * @param flags : reserved should be 0 - * @param wm_log_level : wholememory log level, the default level is "info" + * @param log_level : wholememory log level, the default level is "info" * @return : wholememory_error_code_t */ -wholememory_error_code_t wholememory_init(unsigned int flags, unsigned int wm_log_level = 3); +wholememory_error_code_t wholememory_init(unsigned int flags, LogLevel log_level = LEVEL_INFO); /** * Finalize WholeMemory library diff --git a/cpp/src/logger.cpp b/cpp/src/logger.cpp index 21bd618f2..bc0337cfa 100644 --- a/cpp/src/logger.cpp +++ b/cpp/src/logger.cpp @@ -21,14 +21,14 @@ namespace wholememory { -int& get_log_level() +LogLevel& get_log_level() { - static int log_level = LEVEL_INFO; + static LogLevel log_level = LEVEL_INFO; return log_level; } -void set_log_level(int lev) { get_log_level() = lev; } +void set_log_level(LogLevel lev) { get_log_level() = lev; } -bool will_log_for(int lev) { return lev <= get_log_level(); } +bool will_log_for(LogLevel lev) { return lev <= get_log_level(); } } // namespace wholememory diff --git a/cpp/src/logger.hpp b/cpp/src/logger.hpp index 0d10f0638..5fe9a6689 100644 --- a/cpp/src/logger.hpp +++ b/cpp/src/logger.hpp @@ -24,21 +24,15 @@ #include <raft/core/error.hpp> #include "error.hpp" +#include <wholememory/wholememory.h> namespace wholememory { -static constexpr int LEVEL_FATAL = 0; -static constexpr int LEVEL_ERROR = 10; -static constexpr int LEVEL_WARN = 100; -static constexpr int LEVEL_INFO = 1000; -static constexpr int LEVEL_DEBUG = 10000; -static constexpr int LEVEL_TRACE = 100000; +LogLevel& get_log_level(); -int& get_log_level(); +void set_log_level(LogLevel lev); -void set_log_level(int lev); - -bool will_log_for(int lev); +bool will_log_for(LogLevel lev); /** * @defgroup CStringFormat Expand a C-style format string @@ -86,10 +80,10 @@ inline std::string format(const char* fmt, ...) throw wholememory::logic_error(fatal_msg); \ } while (0) -#define WHOLEMEMORY_ERROR(fmt, ...) WHOLEMEMORY_LOG(wholememory::LEVEL_ERROR, fmt, ##__VA_ARGS__) -#define WHOLEMEMORY_WARN(fmt, ...) WHOLEMEMORY_LOG(wholememory::LEVEL_WARN, fmt, ##__VA_ARGS__) -#define WHOLEMEMORY_INFO(fmt, ...) WHOLEMEMORY_LOG(wholememory::LEVEL_INFO, fmt, ##__VA_ARGS__) -#define WHOLEMEMORY_DEBUG(fmt, ...) WHOLEMEMORY_LOG(wholememory::LEVEL_DEBUG, fmt, ##__VA_ARGS__) -#define WHOLEMEMORY_TRACE(fmt, ...) WHOLEMEMORY_LOG(wholememory::LEVEL_TRACE, fmt, ##__VA_ARGS__) +#define WHOLEMEMORY_ERROR(fmt, ...) WHOLEMEMORY_LOG(LEVEL_ERROR, fmt, ##__VA_ARGS__) +#define WHOLEMEMORY_WARN(fmt, ...) WHOLEMEMORY_LOG(LEVEL_WARN, fmt, ##__VA_ARGS__) +#define WHOLEMEMORY_INFO(fmt, ...) WHOLEMEMORY_LOG(LEVEL_INFO, fmt, ##__VA_ARGS__) +#define WHOLEMEMORY_DEBUG(fmt, ...) WHOLEMEMORY_LOG(LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define WHOLEMEMORY_TRACE(fmt, ...) WHOLEMEMORY_LOG(LEVEL_TRACE, fmt, ##__VA_ARGS__) } // namespace wholememory diff --git a/cpp/src/wholememory/initialize.cpp b/cpp/src/wholememory/initialize.cpp index b7d1e54ac..f614ad38f 100644 --- a/cpp/src/wholememory/initialize.cpp +++ b/cpp/src/wholememory/initialize.cpp @@ -17,7 +17,6 @@ #include <cuda.h> #include <cuda_runtime_api.h> -#include <math.h> #include <nccl.h> #include "communicator.hpp" @@ -33,7 +32,7 @@ static bool is_wm_init = false; static const std::string RAFT_NAME = "wholememory"; static cudaDeviceProp* device_props = nullptr; -wholememory_error_code_t init(unsigned int flags, unsigned int wm_log_level) noexcept +wholememory_error_code_t init(unsigned int flags, LogLevel log_level) noexcept { try { std::unique_lock<std::mutex> lock(mu); @@ -51,7 +50,7 @@ wholememory_error_code_t init(unsigned int flags, unsigned int wm_log_level) noe WM_CUDA_CHECK(cudaGetDeviceProperties(device_props + i, i)); } is_wm_init = true; - wholememory::set_log_level(std::pow(10, wm_log_level)); + wholememory::set_log_level(log_level); return WHOLEMEMORY_SUCCESS; } catch (raft::logic_error& logic_error) { WHOLEMEMORY_ERROR("init failed, logic_error=%s", logic_error.what()); diff --git a/cpp/src/wholememory/initialize.hpp b/cpp/src/wholememory/initialize.hpp index 77870f989..6afb1cbe8 100644 --- a/cpp/src/wholememory/initialize.hpp +++ b/cpp/src/wholememory/initialize.hpp @@ -21,7 +21,7 @@ namespace wholememory { -wholememory_error_code_t init(unsigned int flags, unsigned int wm_log_level) noexcept; +wholememory_error_code_t init(unsigned int flags, LogLevel log_level) noexcept; wholememory_error_code_t finalize() noexcept; diff --git a/cpp/src/wholememory/wholememory.cpp b/cpp/src/wholememory/wholememory.cpp index 2f5f33a36..478833117 100644 --- a/cpp/src/wholememory/wholememory.cpp +++ b/cpp/src/wholememory/wholememory.cpp @@ -25,9 +25,9 @@ extern "C" { #endif -wholememory_error_code_t wholememory_init(unsigned int flags, unsigned int wm_log_level) +wholememory_error_code_t wholememory_init(unsigned int flags, LogLevel log_level) { - return wholememory::init(flags, wm_log_level); + return wholememory::init(flags, log_level); } wholememory_error_code_t wholememory_finalize() { return wholememory::finalize(); } diff --git a/python/pylibwholegraph/pylibwholegraph/binding/wholememory_binding.pyx b/python/pylibwholegraph/pylibwholegraph/binding/wholememory_binding.pyx index 77d86ffdb..0499007bf 100644 --- a/python/pylibwholegraph/pylibwholegraph/binding/wholememory_binding.pyx +++ b/python/pylibwholegraph/pylibwholegraph/binding/wholememory_binding.pyx @@ -71,7 +71,16 @@ cdef extern from "wholememory/wholememory.h": WHOLEMEMORY_DB_NONE "WHOLEMEMORY_DB_NONE" WHOLEMEMORY_DB_NCCL "WHOLEMEMORY_DB_NCCL" WHOLEMEMORY_DB_NVSHMEM "WHOLEMEMORY_DB_NVSHMEM" - cdef wholememory_error_code_t wholememory_init(unsigned int flags, unsigned int wm_log_level) + + ctypedef enum LogLevel: + LEVEL_FATAL "LEVEL_FATAL" + LEVEL_ERROR "LEVEL_ERROR" + LEVEL_WARN "LEVEL_WARN" + LEVEL_INFO "LEVEL_INFO" + LEVEL_DEBUG "LEVEL_DEBUG" + LEVEL_TRACE "LEVEL_TRACE" + + cdef wholememory_error_code_t wholememory_init(unsigned int flags, LogLevel log_level) cdef wholememory_error_code_t wholememory_finalize() @@ -204,6 +213,14 @@ cpdef enum WholeMemoryDistributedBackend: DbNCCL = WHOLEMEMORY_DB_NCCL DbNVSHMEM = WHOLEMEMORY_DB_NVSHMEM +cpdef enum WholeMemoryLogLevel: + LevFatal = LEVEL_FATAL + LevError = LEVEL_ERROR + LevWarn = LEVEL_WARN + LevInfo = LEVEL_INFO + LevDebug = LEVEL_DEBUG + LevTrace = LEVEL_TRACE + cdef check_wholememory_error_code(wholememory_error_code_t err): cdef WholeMemoryErrorCode err_code = int(err) if err_code == Success: @@ -986,8 +1003,8 @@ cdef class PyWholeMemoryUniqueID: def __dlpack_device__(self): return (kDLCPU, 0) -def init(unsigned int flags, unsigned int wm_log_level = 3): - check_wholememory_error_code(wholememory_init(flags, wm_log_level)) +def init(unsigned int flags, LogLevel log_level = LEVEL_INFO): + check_wholememory_error_code(wholememory_init(flags, log_level)) def finalize(): check_wholememory_error_code(wholememory_finalize()) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/common_options.py b/python/pylibwholegraph/pylibwholegraph/torch/common_options.py index 14955305b..3bf480ba1 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/common_options.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/common_options.py @@ -9,7 +9,7 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License.ß +# limitations under the License. from argparse import ArgumentParser diff --git a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py index 94ee74261..535594f6b 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py @@ -16,15 +16,15 @@ import torch.utils.dlpack import pylibwholegraph.binding.wholememory_binding as wmb from .comm import set_world_info, get_global_communicator, get_local_node_communicator, reset_communicators +from .utils import str_to_wmb_wholememory_log_level def init(world_rank: int, world_size: int, local_rank: int, local_size: int, wm_log_level="info"): - log_level_dic = {"error": 1, "warn": 2, "info": 3, "debug": 4, "trace": 5} - wmb.init(0, log_level_dic[wm_log_level]) + wmb.init(0, str_to_wmb_wholememory_log_level(wm_log_level)) set_world_info(world_rank, world_size, local_rank, local_size) -def init_torch_env(world_rank: int, world_size: int, local_rank: int, local_size: int, wm_log_level): +def init_torch_env(world_rank: int, world_size: int, local_rank: int, local_size: int, wm_log_level="info"): r"""Init WholeGraph environment for PyTorch. :param world_rank: world rank of current process :param world_size: world size of all processes @@ -45,8 +45,7 @@ def init_torch_env(world_rank: int, world_size: int, local_rank: int, local_size print("[WARNING] MASTER_PORT not set, resetting to 12335") os.environ["MASTER_PORT"] = "12335" - log_level_dic = {"error": 1, "warn": 2, "info": 3, "debug": 4, "trace": 5} - wmb.init(0, log_level_dic[wm_log_level]) + wmb.init(0, str_to_wmb_wholememory_log_level(wm_log_level)) torch.set_num_threads(1) torch.cuda.set_device(local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") diff --git a/python/pylibwholegraph/pylibwholegraph/torch/utils.py b/python/pylibwholegraph/pylibwholegraph/torch/utils.py index dab74a261..c03c2f061 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/utils.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/utils.py @@ -99,6 +99,24 @@ def str_to_wmb_wholememory_memory_type(str_wmb_type: str): ) +def str_to_wmb_wholememory_log_level(str_log_level: str): + if str_log_level == "error": + return wmb.WholeMemoryLogLevel.LevError + elif str_log_level == "warn": + return wmb.WholeMemoryLogLevel.LevWarn + elif str_log_level == "info": + return wmb.WholeMemoryLogLevel.LevInfo + elif str_log_level == "debug": + return wmb.WholeMemoryLogLevel.LevDebug + elif str_log_level == "trace": + return wmb.WholeMemoryLogLevel.LevTrace + else: + raise ValueError( + "WholeMemory log level %s not supported, shold be (error, warn, info, debug, trace)" + % (str_log_level,) + ) + + def str_to_wmb_wholememory_location(str_wmb_location: str): if str_wmb_location == "cuda": return wmb.WholeMemoryMemoryLocation.MlDevice From 23b69d0e143c9bf965e196bde56d3e209397c760 Mon Sep 17 00:00:00 2001 From: Chang Liu <liuc@nvidia.com> Date: Tue, 5 Mar 2024 13:14:39 -0800 Subject: [PATCH 10/10] [Bugfix] Host full-neighbor sampling returns wrong results in unit test (#138) This is to fix https://github.com/rapidsai/wholegraph/issues/137. Meanwhile, it also enables the full-neighbor sampling tests. (Without this fix, scenarios with `max_sample_count=-1` would have assertion failures due to https://github.com/rapidsai/wholegraph/issues/137. Authors: - Chang Liu (https://github.com/chang-l) - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/wholegraph/pull/138 --- .../pylibwholegraph/pylibwholegraph/test_utils/test_comm.py | 4 ++-- .../test_wholegraph_unweighted_sample_without_replacement.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py index 83e2fbc52..573126650 100644 --- a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py +++ b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py @@ -103,9 +103,9 @@ def host_sample_all_neighbors( output_id = output_sample_offset_tensor[i] for j in range(end - start): output_dest_tensor[output_id + j] = host_csr_col_ptr[start + j] - output_center_localid_tensor[output_id + j] = node_id + output_center_localid_tensor[output_id + j] = i output_edge_gid_tensor[output_id + j] = start + j - return output_dest_tensor, output_center_localid_tensor, output_edge_gid_tensor + return output_sample_offset_tensor, output_dest_tensor, output_center_localid_tensor, output_edge_gid_tensor def copy_host_1D_tensor_to_wholememory( diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py index 776a94094..1953419f5 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py @@ -359,7 +359,7 @@ def routine_func(world_rank: int, world_size: int, **kwargs): @pytest.mark.parametrize("graph_node_count", [103]) @pytest.mark.parametrize("graph_edge_count", [1043]) -@pytest.mark.parametrize("max_sample_count", [11]) +@pytest.mark.parametrize("max_sample_count", [11, -1]) @pytest.mark.parametrize("center_node_count", [13]) @pytest.mark.parametrize("center_node_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("col_id_dtype", [0, 1])