Skip to content

Commit

Permalink
Merge branch 'main' into run_est
Browse files Browse the repository at this point in the history
  • Loading branch information
sanketpurandare authored Oct 28, 2024
2 parents e889a5e + 94e4a1f commit adf2b31
Show file tree
Hide file tree
Showing 1,818 changed files with 55,913 additions and 25,836 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/executorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cd1c833b079adb324871dcbbe75b43d42ffc0ade
export-D64151426
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/triton-cpu.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
c7711371cace304afe265c1ffa906415ab82fc66
7 changes: 4 additions & 3 deletions .ci/docker/common/install_clang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ if [ -n "$CLANG_VERSION" ]; then
fi

sudo apt-get update
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
if [[ $CLANG_VERSION == 18 ]]; then
apt-get install -y --no-install-recommends libomp-18-dev
if [[ $CLANG_VERSION -ge 18 ]]; then
apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
else
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
fi

# Install dev version of LLVM.
Expand Down
19 changes: 2 additions & 17 deletions .ci/docker/common/install_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,10 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then

# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
if [[ $(uname -m) == "aarch64" ]]; then
CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2"

if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
NUMPY_VERSION=1.24.4
else
NUMPY_VERSION=1.26.2
fi
conda_install "openblas==0.3.25=*openmp*"
else
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"

if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then
NUMPY_VERSION=1.26.0
else
NUMPY_VERSION=1.21.2
fi
conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
fi
conda_install ${CONDA_COMMON_DEPS}

# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
# and libpython-static for torch deploy
Expand All @@ -103,8 +90,6 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then

# Install some other packages, including those needed for Python test reporting
pip_install -r /opt/conda/requirements-ci.txt
pip_install numpy=="$NUMPY_VERSION"
pip_install -U scikit-learn

if [ -n "$DOCS" ]; then
apt-get update
Expand Down
71 changes: 70 additions & 1 deletion .ci/docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,39 @@ function install_124 {
ldconfig
}

function install_126 {
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
# install CUDA 12.6.2 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
chmod +x cuda_12.6.2_560.35.03_linux.run
./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
rm -f cuda_12.6.2_560.35.03_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl

install_cusparselt_062

ldconfig
}

function prune_118 {
echo "Pruning CUDA 11.8 and cuDNN"
#####################################################################################
Expand Down Expand Up @@ -227,12 +260,46 @@ function prune_124 {
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a

#####################################################################################
# CUDA 12.1 prune visual tools
# CUDA 12.4 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}

function prune_126 {
echo "Pruning CUDA 12.6"
#####################################################################################
# CUDA 12.6 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"

export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"

if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi

# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"

# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a

#####################################################################################
# CUDA 12.6 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.6/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}

# idiomatic parameter and option handling in sh
while test $# -gt 0
do
Expand All @@ -243,6 +310,8 @@ do
;;
12.4) install_124; prune_124
;;
12.6) install_126; prune_126
;;
*) echo "bad argument $1"; exit 1
;;
esac
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/common/install_onnx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pip_install coloredlogs packaging

pip_install onnxruntime==1.18.1
pip_install onnx==1.16.2
pip_install onnxscript==0.1.0.dev20240831 --no-deps
pip_install onnxscript==0.1.0.dev20241009 --no-deps
# required by onnxscript
pip_install ml_dtypes

Expand Down
11 changes: 7 additions & 4 deletions .ci/docker/common/install_xpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ function install_ubuntu() {
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
apt-get install -y intel-ocloc
fi
# Development Packages
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
# Install Intel Support Packages
if [ -n "$XPU_VERSION" ]; then
apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev
apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev-0.9
else
apt-get install -y intel-for-pytorch-gpu-dev intel-pti-dev
apt-get install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
fi

# Cleanup
Expand Down Expand Up @@ -97,7 +100,7 @@ EOF
intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
level-zero-devel
# Install Intel Support Packages
yum install -y intel-for-pytorch-gpu-dev intel-pti-dev
yum install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9

# Cleanup
dnf clean all
Expand Down Expand Up @@ -131,7 +134,7 @@ function install_sles() {
zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel

# Install Intel Support Packages
zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev
zypper install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9

}

Expand Down
5 changes: 5 additions & 0 deletions .ci/docker/conda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ FROM cuda as cuda12.4
RUN bash ./install_cuda.sh 12.4
ENV DESIRED_CUDA=12.4

FROM cuda as cuda12.6
RUN bash ./install_cuda.sh 12.6
ENV DESIRED_CUDA=12.6

# Install MNIST test data
FROM base as mnist
ADD ./common/install_mnist.sh install_mnist.sh
Expand All @@ -79,6 +83,7 @@ FROM base as all_cuda
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1
COPY --from=cuda12.4 /usr/local/cuda-12.4 /usr/local/cuda-12.4
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6

# Final step
FROM ${BASE_TARGET} as final
Expand Down
5 changes: 5 additions & 0 deletions .ci/docker/libtorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ RUN bash ./install_cuda.sh 12.4
RUN bash ./install_magma.sh 12.4
RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda

FROM cuda as cuda12.6
RUN bash ./install_cuda.sh 12.6
RUN bash ./install_magma.sh 12.6
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda

FROM cpu as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
Expand Down
14 changes: 5 additions & 9 deletions .ci/docker/manywheel/build_scripts/ssl-check.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
# cf. https://github.com/pypa/manylinux/issues/53

import sys
from urllib.request import urlopen


GOOD_SSL = "https://google.com"
BAD_SSL = "https://self-signed.badssl.com"

import sys


print("Testing SSL certificate checking for Python:", sys.version)

if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
print("This version never checks SSL certs; skipping tests")
sys.exit(0)

if sys.version_info[0] >= 3:
from urllib.request import urlopen

EXC = OSError
else:
from urllib import urlopen

EXC = IOError
EXC = OSError

print(f"Connecting to {GOOD_SSL} should work")
urlopen(GOOD_SSL)
Expand Down
36 changes: 31 additions & 5 deletions .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#Pinned versions: 1.6
#test that import:

boto3==1.19.12
boto3==1.35.42
#Description: AWS SDK for python
#Pinned versions: 1.19.12, 1.16.34
#test that import:
Expand Down Expand Up @@ -118,7 +118,7 @@ numba==0.55.2 ; python_version == "3.10"

#numpy
#Description: Provides N-dimensional arrays and linear algebra
#Pinned versions: 1.20
#Pinned versions: 1.26.2
#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
Expand All @@ -128,6 +128,10 @@ numba==0.55.2 ; python_version == "3.10"
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
#test_binary_ufuncs.py
numpy==1.21.2; python_version == "3.9"
numpy==1.22.4; python_version == "3.10"
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
numpy==2.1.2; python_version >= "3.13"

#onnxruntime
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
Expand Down Expand Up @@ -253,7 +257,7 @@ tb-nightly==2.13.0a20230426
#test that import:

# needed by torchgen utils
typing-extensions
typing-extensions>=4.10.0
#Description: type hints for python
#Pinned versions:
#test that import:
Expand Down Expand Up @@ -322,13 +326,12 @@ lxml==5.0.0

PyGithub==2.3.0

sympy==1.12.1 ; python_version == "3.8"
sympy==1.13.1 ; python_version >= "3.9"
#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
#Pinned versions:
#test that import:

onnx==1.16.1
onnx==1.17.0
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions:
#test that import:
Expand All @@ -342,3 +345,26 @@ parameterized==0.8.1
#Description: Parameterizes unittests, both the tests themselves and the entire testing class
#Pinned versions:
#test that import:

#Description: required for testing torch/distributed/_tools/sac_estimator.py
#Pinned versions: 1.24.0
#test that import: test_sac_estimator.py

pwlf==2.2.1 ; python_version >= "3.8"
#Description: required for testing torch/distributed/_tools/sac_estimator.py
#Pinned versions: 2.2.1
#test that import: test_sac_estimator.py


# To build PyTorch itself
astunparse
PyYAML
setuptools

ninja==1.11.1 ; platform_machine == "aarch64"
scons==4.5.2 ; platform_machine == "aarch64"

pulp==2.9.0 ; python_version >= "3.8"
#Description: required for testing ilp formulaiton under torch/distributed/_tools
#Pinned versions: 2.9.0
#test that import: test_sac_ilp.py
10 changes: 10 additions & 0 deletions .ci/libtorch/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# This is mostly just a shim to manywheel/build.sh
# TODO: Make this a dedicated script to build just libtorch

set -ex

SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
21 changes: 21 additions & 0 deletions .ci/manywheel/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2016 manylinux

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
25 changes: 25 additions & 0 deletions .ci/manywheel/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

set -ex

SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

case "${GPU_ARCH_TYPE:-BLANK}" in
BLANK)
# Legacy behavior for CircleCI
bash "${SCRIPTPATH}/build_cuda.sh"
;;
cuda)
bash "${SCRIPTPATH}/build_cuda.sh"
;;
rocm)
bash "${SCRIPTPATH}/build_rocm.sh"
;;
cpu | cpu-cxx11-abi | cpu-s390x | xpu)
bash "${SCRIPTPATH}/build_cpu.sh"
;;
*)
echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
exit 1
;;
esac
Loading

0 comments on commit adf2b31

Please sign in to comment.