Merge branch 'main' into run_est

sanketpurandare · Oct 28, 2024 · adf2b31 · adf2b31
2 parents e889a5e + 94e4a1f
commit adf2b31
Show file tree

Hide file tree

Showing 1,818 changed files with 55,913 additions and 25,836 deletions.
diff --git a/.ci/docker/ci_commit_pins/executorch.txt b/.ci/docker/ci_commit_pins/executorch.txt
@@ -1 +1 @@
-cd1c833b079adb324871dcbbe75b43d42ffc0ade
+export-D64151426
diff --git a/.ci/docker/ci_commit_pins/triton-cpu.txt b/.ci/docker/ci_commit_pins/triton-cpu.txt
@@ -1 +1 @@
-6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
+c7711371cace304afe265c1ffa906415ab82fc66
diff --git a/.ci/docker/common/install_clang.sh b/.ci/docker/common/install_clang.sh
@@ -20,9 +20,10 @@ if [ -n "$CLANG_VERSION" ]; then
   fi
 
   sudo apt-get update
-  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
-  if [[ $CLANG_VERSION == 18 ]]; then
-    apt-get install -y --no-install-recommends libomp-18-dev
+  if [[ $CLANG_VERSION -ge 18 ]]; then
+    apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
+  else
+    apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
   fi
 
   # Install dev version of LLVM.

diff --git a/.ci/docker/common/install_conda.sh b/.ci/docker/common/install_conda.sh
@@ -65,23 +65,10 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
 
   # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
   if [[ $(uname -m) == "aarch64" ]]; then
-    CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2"
-
-    if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
-      NUMPY_VERSION=1.24.4
-    else
-      NUMPY_VERSION=1.26.2
-    fi
+    conda_install "openblas==0.3.25=*openmp*"
   else
-    CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
-
-    if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then
-      NUMPY_VERSION=1.26.0
-    else
-      NUMPY_VERSION=1.21.2
-    fi
+    conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
   fi
-  conda_install ${CONDA_COMMON_DEPS}
 
   # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
   # and libpython-static for torch deploy
@@ -103,8 +90,6 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
 
   # Install some other packages, including those needed for Python test reporting
   pip_install -r /opt/conda/requirements-ci.txt
-  pip_install numpy=="$NUMPY_VERSION"
-  pip_install -U scikit-learn
 
   if [ -n "$DOCS" ]; then
     apt-get update

diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh
@@ -137,6 +137,39 @@ function install_124 {
   ldconfig
 }
 
+function install_126 {
+  echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
+  rm -rf /usr/local/cuda-12.6 /usr/local/cuda
+  # install CUDA 12.6.2 in the same container
+  wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
+  chmod +x cuda_12.6.2_560.35.03_linux.run
+  ./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
+  rm -f cuda_12.6.2_560.35.03_linux.run
+  rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
+
+  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+  mkdir tmp_cudnn && cd tmp_cudnn
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  cd ..
+  rm -rf tmp_cudnn
+
+  # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
+  # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
+  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
+  cd nccl && make -j src.build
+  cp -a build/include/* /usr/local/cuda/include/
+  cp -a build/lib/* /usr/local/cuda/lib64/
+  cd ..
+  rm -rf nccl
+
+  install_cusparselt_062
+
+  ldconfig
+}
+
 function prune_118 {
     echo "Pruning CUDA 11.8 and cuDNN"
     #####################################################################################
@@ -227,12 +260,46 @@ function prune_124 {
   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
 
   #####################################################################################
-  # CUDA 12.1 prune visual tools
+  # CUDA 12.4 prune visual tools
   #####################################################################################
   export CUDA_BASE="/usr/local/cuda-12.4/"
   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
 }
 
+function prune_126 {
+  echo "Pruning CUDA 12.6"
+  #####################################################################################
+  # CUDA 12.6 prune static libs
+  #####################################################################################
+  export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
+  export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
+
+  export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+  export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+
+  if [[ -n "$OVERRIDE_GENCODE" ]]; then
+      export GENCODE=$OVERRIDE_GENCODE
+  fi
+  if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
+      export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
+  fi
+
+  # all CUDA libs except CuDNN and CuBLAS
+  ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
+      | xargs -I {} bash -c \
+                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
+
+  # prune CuDNN and CuBLAS
+  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
+  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
+
+  #####################################################################################
+  # CUDA 12.6 prune visual tools
+  #####################################################################################
+  export CUDA_BASE="/usr/local/cuda-12.6/"
+  rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
+}
+
 # idiomatic parameter and option handling in sh
 while test $# -gt 0
 do
@@ -243,6 +310,8 @@ do
         ;;
     12.4) install_124; prune_124
         ;;
+    12.6) install_126; prune_126
+        ;;
     *) echo "bad argument $1"; exit 1
         ;;
     esac

diff --git a/.ci/docker/common/install_onnx.sh b/.ci/docker/common/install_onnx.sh
@@ -32,7 +32,7 @@ pip_install coloredlogs packaging
 
 pip_install onnxruntime==1.18.1
 pip_install onnx==1.16.2
-pip_install onnxscript==0.1.0.dev20240831 --no-deps
+pip_install onnxscript==0.1.0.dev20241009 --no-deps
 # required by onnxscript
 pip_install ml_dtypes
 

diff --git a/.ci/docker/common/install_xpu.sh b/.ci/docker/common/install_xpu.sh
@@ -41,13 +41,16 @@ function install_ubuntu() {
         libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
         libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
         mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
+    if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
+        apt-get install -y intel-ocloc
+    fi
     # Development Packages
     apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
     # Install Intel Support Packages
     if [ -n "$XPU_VERSION" ]; then
-        apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev
+        apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev-0.9
     else
-        apt-get install -y intel-for-pytorch-gpu-dev intel-pti-dev
+        apt-get install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
     fi
 
     # Cleanup
@@ -97,7 +100,7 @@ EOF
         intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
         level-zero-devel
     # Install Intel Support Packages
-    yum install -y intel-for-pytorch-gpu-dev intel-pti-dev
+    yum install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
 
     # Cleanup
     dnf clean all
@@ -131,7 +134,7 @@ function install_sles() {
     zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
 
     # Install Intel Support Packages
-    zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev
+    zypper install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
 
 }
 

diff --git a/.ci/docker/conda/Dockerfile b/.ci/docker/conda/Dockerfile
@@ -70,6 +70,10 @@ FROM cuda as cuda12.4
 RUN bash ./install_cuda.sh 12.4
 ENV DESIRED_CUDA=12.4
 
+FROM cuda as cuda12.6
+RUN bash ./install_cuda.sh 12.6
+ENV DESIRED_CUDA=12.6
+
 # Install MNIST test data
 FROM base as mnist
 ADD ./common/install_mnist.sh install_mnist.sh
@@ -79,6 +83,7 @@ FROM base as all_cuda
 COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8
 COPY --from=cuda12.1  /usr/local/cuda-12.1 /usr/local/cuda-12.1
 COPY --from=cuda12.4  /usr/local/cuda-12.4 /usr/local/cuda-12.4
+COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6
 
 # Final step
 FROM ${BASE_TARGET} as final

diff --git a/.ci/docker/libtorch/Dockerfile b/.ci/docker/libtorch/Dockerfile
@@ -66,6 +66,11 @@ RUN bash ./install_cuda.sh 12.4
 RUN bash ./install_magma.sh 12.4
 RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda
 
+FROM cuda as cuda12.6
+RUN bash ./install_cuda.sh 12.6
+RUN bash ./install_magma.sh 12.6
+RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
+
 FROM cpu as rocm
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}

diff --git a/.ci/docker/manywheel/build_scripts/ssl-check.py b/.ci/docker/manywheel/build_scripts/ssl-check.py
@@ -1,25 +1,21 @@
 # cf. https://github.com/pypa/manylinux/issues/53
 
+import sys
+from urllib.request import urlopen
+
+
 GOOD_SSL = "https://google.com"
 BAD_SSL = "https://self-signed.badssl.com"
 
-import sys
-
 
 print("Testing SSL certificate checking for Python:", sys.version)
 
 if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
     print("This version never checks SSL certs; skipping tests")
     sys.exit(0)
 
-if sys.version_info[0] >= 3:
-    from urllib.request import urlopen
-
-    EXC = OSError
-else:
-    from urllib import urlopen
 
-    EXC = IOError
+EXC = OSError
 
 print(f"Connecting to {GOOD_SSL} should work")
 urlopen(GOOD_SSL)

diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -5,7 +5,7 @@
 #Pinned versions: 1.6
 #test that import:
 
-boto3==1.19.12
+boto3==1.35.42
 #Description: AWS SDK for python
 #Pinned versions: 1.19.12, 1.16.34
 #test that import:
@@ -118,7 +118,7 @@ numba==0.55.2 ; python_version == "3.10"
 
 #numpy
 #Description: Provides N-dimensional arrays and linear algebra
-#Pinned versions: 1.20
+#Pinned versions: 1.26.2
 #test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
 #test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
 #test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
@@ -128,6 +128,10 @@ numba==0.55.2 ; python_version == "3.10"
 #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
 #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
 #test_binary_ufuncs.py
+numpy==1.21.2; python_version == "3.9"
+numpy==1.22.4; python_version == "3.10"
+numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
+numpy==2.1.2; python_version >= "3.13"
 
 #onnxruntime
 #Description: scoring engine for Open Neural Network Exchange (ONNX) models
@@ -253,7 +257,7 @@ tb-nightly==2.13.0a20230426
 #test that import:
 
 # needed by torchgen utils
-typing-extensions
+typing-extensions>=4.10.0
 #Description: type hints for python
 #Pinned versions:
 #test that import:
@@ -322,13 +326,12 @@ lxml==5.0.0
 
 PyGithub==2.3.0
 
-sympy==1.12.1 ; python_version == "3.8"
 sympy==1.13.1 ; python_version >= "3.9"
 #Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
 #Pinned versions:
 #test that import:
 
-onnx==1.16.1
+onnx==1.17.0
 #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
 #Pinned versions:
 #test that import:
@@ -342,3 +345,26 @@ parameterized==0.8.1
 #Description: Parameterizes unittests, both the tests themselves and the entire testing class
 #Pinned versions:
 #test that import:
+
+#Description: required for testing torch/distributed/_tools/sac_estimator.py
+#Pinned versions: 1.24.0
+#test that import: test_sac_estimator.py
+
+pwlf==2.2.1 ; python_version >= "3.8"
+#Description: required for testing torch/distributed/_tools/sac_estimator.py
+#Pinned versions: 2.2.1
+#test that import: test_sac_estimator.py
+
+
+# To build PyTorch itself
+astunparse
+PyYAML
+setuptools
+
+ninja==1.11.1 ; platform_machine == "aarch64"
+scons==4.5.2 ; platform_machine == "aarch64"
+
+pulp==2.9.0 ; python_version >= "3.8"
+#Description: required for testing ilp formulaiton under torch/distributed/_tools
+#Pinned versions: 2.9.0
+#test that import: test_sac_ilp.py
diff --git a/.ci/libtorch/build.sh b/.ci/libtorch/build.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# This is mostly just a shim to manywheel/build.sh
+# TODO: Make this a dedicated script to build just libtorch
+
+set -ex
+
+SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
diff --git a/.ci/manywheel/LICENSE b/.ci/manywheel/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 manylinux
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/.ci/manywheel/build.sh b/.ci/manywheel/build.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+case "${GPU_ARCH_TYPE:-BLANK}" in
+    BLANK)
+        # Legacy behavior for CircleCI
+        bash "${SCRIPTPATH}/build_cuda.sh"
+        ;;
+    cuda)
+        bash "${SCRIPTPATH}/build_cuda.sh"
+        ;;
+    rocm)
+        bash "${SCRIPTPATH}/build_rocm.sh"
+        ;;
+    cpu | cpu-cxx11-abi | cpu-s390x | xpu)
+        bash "${SCRIPTPATH}/build_cpu.sh"
+        ;;
+    *)
+        echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
+        exit 1
+        ;;
+esac
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		cd1c833b079adb324871dcbbe75b43d42ffc0ade
		export-D64151426
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
		c7711371cace304afe265c1ffa906415ab82fc66