Skip to content

Commit

Permalink
Merge remote-tracking branch 'NVIDIA/main' into arr_reshape
Browse files Browse the repository at this point in the history
  • Loading branch information
gmarkall committed Nov 29, 2024
2 parents 7928f2d + c04efe0 commit 0a0eb35
Show file tree
Hide file tree
Showing 37 changed files with 1,614 additions and 78 deletions.
22 changes: 21 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ jobs:
- compute-matrix
- build-conda
- test-conda
- test-conda-pynvjitlink
- build-wheels
- test-wheels
- test-wheels-pynvjitlink
- build-docs
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -57,6 +59,16 @@ jobs:
script: "ci/test_conda.sh"
run_codecov: false
matrix_filter: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
test-conda-pynvjitlink:
needs:
- build-conda
- compute-matrix
uses: ./.github/workflows/conda-python-tests.yaml
with:
build_type: pull-request
script: "ci/test_conda_pynvjitlink.sh"
run_codecov: false
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.11"))
build-wheels:
needs:
- compute-matrix
Expand All @@ -71,7 +83,15 @@ jobs:
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_wheel.sh"
script: "ci/test_wheel.sh false"
test-wheels-pynvjitlink:
needs:
- build-wheels
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_wheel_pynvjitlink.sh"
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.12"))
build-docs:
needs:
- build-conda
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ __pycache__
build
.*.swp
*.so
numba_cuda/numba/cuda/tests/cudadrv/test_device_functions.*
numba_cuda/numba/cuda/tests/cudadrv/undefined_extern.*
2 changes: 1 addition & 1 deletion ci/test_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -euo pipefail
if [ "${CUDA_VER%.*.*}" = "11" ]; then
CTK_PACKAGES="cudatoolkit"
else
CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc"
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc"
fi

rapids-logger "Install testing dependencies"
Expand Down
77 changes: 77 additions & 0 deletions ci/test_conda_pynvjitlink.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION

set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

if [ "${CUDA_VER%.*.*}" = "11" ]; then
CTK_PACKAGES="cudatoolkit"
else
CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc"
fi

rapids-logger "Install testing dependencies"
# TODO: Replace with rapids-dependency-file-generator
rapids-mamba-retry create -n test \
c-compiler \
cxx-compiler \
${CTK_PACKAGES} \
cuda-python \
cuda-version=${CUDA_VER%.*} \
make \
psutil \
pytest \
python=${RAPIDS_PY_VERSION}

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

rapids-mamba-retry install -c `pwd`/conda-repo numba-cuda

RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"
pushd "${RAPIDS_TESTS_DIR}"

rapids-print-env

rapids-logger "Check GPU usage"
nvidia-smi

rapids-logger "Show Numba system info"
python -m numba --sysinfo

EXITCODE=0
trap "EXITCODE=1" ERR
set +e


rapids-logger "Install pynvjitlink"
set +u
rapids-mamba-retry install -c rapidsai pynvjitlink
set -u

rapids-logger "Build tests"

PY_SCRIPT="
import numba_cuda
root = numba_cuda.__file__.rstrip('__init__.py')
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
print(test_dir)
"

NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
pushd $NUMBA_CUDA_TEST_BIN_DIR
make
popd


rapids-logger "Run Tests"
NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v

popd

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
1 change: 1 addition & 0 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ rapids-logger "Install testing dependencies"
python -m pip install \
psutil \
cuda-python \
nvidia-cuda-cccl-cu12 \
pytest

rapids-logger "Install wheel"
Expand Down
47 changes: 47 additions & 0 deletions ci/test_wheel_pynvjitlink.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION

set -euo pipefail

rapids-logger "Install testing dependencies"
# TODO: Replace with rapids-dependency-file-generator
python -m pip install \
psutil \
cuda-python \
pytest

rapids-logger "Install pynvjitlink"
python -m pip install pynvjitlink-cu12

rapids-logger "Build tests"
PY_SCRIPT="
import numba_cuda
root = numba_cuda.__file__.rstrip('__init__.py')
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
print(test_dir)
"

NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
pushd $NUMBA_CUDA_TEST_BIN_DIR
make
popd

rapids-logger "Install wheel"
package=$(realpath wheel/numba_cuda*.whl)
echo "Package path: $package"
python -m pip install $package

rapids-logger "Check GPU usage"
nvidia-smi

RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"
pushd "${RAPIDS_TESTS_DIR}"

rapids-logger "Show Numba system info"
python -m numba --sysinfo

rapids-logger "Run Tests"
NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v

popd
10 changes: 10 additions & 0 deletions docs/source/reference/envvars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,14 @@ target.
``/usr/local/cuda/include``. On Windows, the default is
``$env:CUDA_PATH\include``.

.. envvar:: NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY

Enable minor version compatibility for the CUDA driver. Requires the
``cubinlinker`` and ``ptxcompiler`` packages to be installed. Provides minor
version compatibility for driver versions less than 12.0.

.. envvar:: NUMBA_CUDA_ENABLE_PYNVJITLINK

Use ``pynvjitlink`` for minor version compatibility. Requires the ``pynvjitlink``
package to be installed. Provides minor version compatibility for driver versions
greater than 12.0.
6 changes: 4 additions & 2 deletions docs/source/user/minor_version_compatibility.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ MVC support is enabled by setting the environment variable:

.. code:: bash
export NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY=1
export NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY=1 # CUDA 11
export NUMBA_CUDA_ENABLE_PYNVJITLINK=1 # CUDA 12
or by setting a configuration variable prior to using any CUDA functionality in
Expand All @@ -74,7 +75,8 @@ Numba:
.. code:: python
from numba import config
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = True
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = True # CUDA 11
config.CUDA_ENABLE_PYNVJITLINK = True # CUDA 12
References
Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.17
0.0.18
18 changes: 15 additions & 3 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
get_cufunc), which may be of different compute capabilities.
"""

def __init__(self, codegen, name, entry_name=None, max_registers=None,
nvvm_options=None):
def __init__(
self,
codegen,
name,
entry_name=None,
max_registers=None,
lto=False,
nvvm_options=None
):
"""
codegen:
Codegen object.
Expand All @@ -71,6 +78,8 @@ def __init__(self, codegen, name, entry_name=None, max_registers=None,
kernel and not a device function.
max_registers:
The maximum register usage to aim for when linking.
lto:
Whether to enable link-time optimization.
nvvm_options:
Dict of options to pass to NVVM.
"""
Expand Down Expand Up @@ -103,6 +112,7 @@ def __init__(self, codegen, name, entry_name=None, max_registers=None,
self._cufunc_cache = {}

self._max_registers = max_registers
self._lto = lto
if nvvm_options is None:
nvvm_options = {}
self._nvvm_options = nvvm_options
Expand Down Expand Up @@ -178,7 +188,9 @@ def get_cubin(self, cc=None):
if cubin:
return cubin

linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
linker = driver.Linker.new(
max_registers=self._max_registers, cc=cc, lto=self._lto
)

if linker.lto:
ltoir = self.get_ltoir(cc=cc)
Expand Down
68 changes: 68 additions & 0 deletions numba_cuda/numba/cuda/cuda_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import re
import os
from collections import namedtuple
import platform

from numba.core.config import IS_WIN32
from numba.misc.findlib import find_lib, find_file
from numba import config


_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
Expand Down Expand Up @@ -241,6 +243,7 @@ def get_cuda_paths():
'libdevice': _get_libdevice_paths(),
'cudalib_dir': _get_cudalib_dir(),
'static_cudalib_dir': _get_static_cudalib_dir(),
'include_dir': _get_include_dir(),
}
# Cache result
get_cuda_paths._cached_result = d
Expand All @@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
if not os.path.exists(pkg_libdevice_location):
return None
return pkg_libdevice_location


def get_current_cuda_target_name():
"""Determine conda's CTK target folder based on system and machine arch.
CTK's conda package delivers headers based on its architecture type. For example,
`x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
`aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
nuances at cudart's conda feedstock:
https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
"""
system = platform.system()
machine = platform.machine()

if system == "Linux":
arch_to_targets = {
'x86_64': 'x86_64-linux',
'aarch64': 'sbsa-linux'
}
elif system == "Windows":
arch_to_targets = {
'AMD64': 'x64',
}
else:
arch_to_targets = {}

return arch_to_targets.get(machine, None)


def get_conda_include_dir():
"""
Return the include directory in the current conda environment, if one
is active and it exists.
"""
is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
if not is_conda_env:
return

if platform.system() == "Windows":
include_dir = os.path.join(
sys.prefix, 'Library', 'include'
)
elif target_name := get_current_cuda_target_name():
include_dir = os.path.join(
sys.prefix, 'targets', target_name, 'include'
)
else:
# A fallback when target cannot determined
# though usually it shouldn't.
include_dir = os.path.join(sys.prefix, 'include')

if os.path.exists(include_dir):
return include_dir
return


def _get_include_dir():
"""Find the root include directory."""
options = [
('Conda environment (NVIDIA package)', get_conda_include_dir()),
('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
# TODO: add others
]
by, include_dir = _find_valid_path(options)
return _env_path_tuple(by, include_dir)
5 changes: 4 additions & 1 deletion numba_cuda/numba/cuda/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
sentry_contiguous(obj)
devobj = from_array_like(obj, stream=stream)
if copy:
if config.CUDA_WARN_ON_IMPLICIT_COPY:
if (
config.CUDA_WARN_ON_IMPLICIT_COPY
and not config.DISABLE_PERFORMANCE_WARNINGS
):
if (
not user_explicit and
(not isinstance(obj, DeviceNDArray)
Expand Down
Loading

0 comments on commit 0a0eb35

Please sign in to comment.