diff --git a/.github/actions/setup-build-cuda/action.yml b/.github/actions/setup-build-cuda/action.yml index 824be1bd6b..7bdfff4360 100644 --- a/.github/actions/setup-build-cuda/action.yml +++ b/.github/actions/setup-build-cuda/action.yml @@ -23,15 +23,19 @@ runs: import sys print(sys.version) cushort = "${{ inputs.toolkit_short_version }}" - TORCH_CUDA_DEFAULT = "121" # pytorch 2.4.1 + # Version uploaded to pypi (rather than PyTorch s3) + TORCH_CUDA_DEFAULT = "124" # since pytorch 2.6.0 # https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts full_version, install_script = { + "128": ("12.8.0", "https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run"), + "126": ("12.6.3", "https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run"), "124": ("12.4.1", "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run"), "121": ("12.1.0", "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"), "118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"), "6.0": ("6.0.2", "https://repo.radeon.com/amdgpu-install/6.0.2/rhel/8.9/amdgpu-install-6.0.60002-1.el8.noarch.rpm"), "6.1": ("6.1.2", "https://repo.radeon.com/amdgpu-install/6.1.3/rhel/8.9/amdgpu-install-6.1.60103-1.el8.noarch.rpm"), - "6.2": ("6.2.3", "https://repo.radeon.com/amdgpu-install/6.2.3/rhel/8.9/amdgpu-install-6.2.60203-1.el8.noarch.rpm"), + "6.2.4": ("6.2.4", "https://repo.radeon.com/amdgpu-install/6.2.4/rhel/8.9/amdgpu-install-6.2.60204-1.el8.noarch.rpm"), + "6.3": ("6.3.1", "https://repo.radeon.com/amdgpu-install/6.3.1/rhel/8.9/amdgpu-install-6.3.60301-1.el8.noarch.rpm"), }[cushort] with open(os.environ['GITHUB_OUTPUT'], "r+") as fp: fp.write("CUDA_VERSION=" + full_version + "\n") @@ -46,10 +50,18 @@ runs: # WINDOWS STEPS - name: Install cuda if: runner.os == 'Windows' && inputs.toolkit_type == 'cuda' - uses: Jimver/cuda-toolkit@v0.2.16 + id: cuda-toolkit + uses: Jimver/cuda-toolkit@v0.2.21 with: cuda: ${{ steps.cuda_info.outputs.CUDA_VERSION }} method: network + - if: runner.os == 'Windows' && inputs.toolkit_type == 'cuda' + shell: bash + run: | + echo "Installed cuda version is: ${{ steps.cuda-toolkit.outputs.cuda }}" + echo "Cuda install location: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" + echo "CUDA_HOME=${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" >> ${GITHUB_ENV} + cat ${GITHUB_ENV} - name: Install python if: runner.os == 'Windows' diff --git a/.github/selective_ci/selective_ci.py b/.github/selective_ci/selective_ci.py index a24fc3e351..5c57ef694a 100644 --- a/.github/selective_ci/selective_ci.py +++ b/.github/selective_ci/selective_ci.py @@ -109,6 +109,9 @@ def list_files_in_commit(commit: git.Commit): def check_patterns_are_valid(patterns): + # Only check patterns in `fairinternal` repo + if os.environ.get("GITHUB_REPOSITORY", "") != "fairinternal/xformers": + return found_patterns = set() for f in all_files: for pattern in patterns: diff --git a/.github/workflows/rocm_build.yml b/.github/workflows/rocm_build.yml index 37fe17b4ec..0f638d1fb9 100644 --- a/.github/workflows/rocm_build.yml +++ b/.github/workflows/rocm_build.yml @@ -22,9 +22,9 @@ jobs: matrix: os: ['ubuntu-alola'] python: ['3.11'] - torch_version: ['2.5.1'] + torch_version: ['2.6.0'] toolkit_type: ['rocm'] - toolkit_short_version: ['6.1', '6.2'] + toolkit_short_version: ['6.1', '6.2.4'] uses: ./.github/workflows/wheels_build.yml if: github.repository == 'rocm/xformers' diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index db6ba6572a..d92cce53e5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -31,8 +31,8 @@ jobs: PY_VERSIONS = ['3.9', '3.10', '3.11', '3.12'] # NOTE: Don't forget to update `upload_pt`'s matrix # when changing the CUDA/ROCM versions below! - CU_VERSIONS = ['118', '121', '124'] - ROCM_VERSIONS = ["6.1"] # <- 6.0 broken in `manylinux_2_28` + CU_VERSIONS = ['118', '124', '126'] + ROCM_VERSIONS = ["6.1", "6.2.4"] # <- 6.0 broken in `manylinux_2_28` PY_CU = list(itertools.product(PY_VERSIONS, CU_VERSIONS)) PY_ROCM = list(itertools.product(PY_VERSIONS, ROCM_VERSIONS)) print("Full matrix PY_CU", PY_CU) @@ -42,11 +42,13 @@ jobs: for cu in CU_VERSIONS[1:]: PY_CU.append((PY_VERSIONS[-1], cu)) print("Limited matrix PY_CU", PY_CU) - PY_ROCM = [(PY_VERSIONS[-1], ROCM_VERSIONS[-1])] + PY_ROCM = [(PY_VERSIONS[0], ROCM_VERSIONS[0])] + for rocm in ROCM_VERSIONS[1:]: + PY_ROCM.append((PY_VERSIONS[-1], rocm)) include = [] for os in ['8-core-ubuntu', 'windows-8-core']: - for torch_version in ['2.5.1']: + for torch_version in ['2.6.0']: # CUDA builds for python, cuda_short_version in PY_CU: if cuda_short_version != "124" and "windows" in os: @@ -96,7 +98,7 @@ jobs: uses: ./.github/workflows/wheels_upload_pip.yml with: twine_username: __token__ - filter: "*torch2.5.1+cu121*" + filter: "*torch2.6.0+cu124*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.event_name != 'pull_request' }} secrets: twine_password: ${{ secrets.PYPI_TOKEN }} @@ -108,14 +110,15 @@ jobs: matrix: suffix: - cu118 - - cu121 - cu124 + - cu126 - rocm6.1 + - rocm6.2.4 uses: ./.github/workflows/wheels_upload_s3.yml with: aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role" s3_path: s3://pytorch/whl/${{ matrix.suffix }}/ aws_s3_cp_extra_args: --acl public-read - filter: "*torch2.5.1+${{ matrix.suffix }}*" + filter: "*torch2.6.0+${{ matrix.suffix }}*" execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }} diff --git a/.github/workflows/wheels_build.yml b/.github/workflows/wheels_build.yml index 4e9e1ccd50..4df007b805 100644 --- a/.github/workflows/wheels_build.yml +++ b/.github/workflows/wheels_build.yml @@ -65,6 +65,13 @@ jobs: submodules: recursive path: "." fetch-depth: 0 # for tags + + - name: HACKFIX for cutlass compiler bug + if: runner.os == 'Windows' + run: | + # See https://github.com/NVIDIA/cutlass/issues/1732 + rm -f third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp + touch third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp - name: Setup Runner uses: ./.github/actions/setup-build-cuda with: @@ -98,6 +105,7 @@ jobs: echo "BUILD_VERSION=$version${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }}" >> ${GITHUB_ENV} echo "BUILD_VERSION=$version${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }}" >> ${GITHUB_OUTPUT} which ninja + ninja --version cat ${GITHUB_ENV} - run: echo "xformers-${BUILD_VERSION}" - run: echo "release version (will upload to PyTorch)" diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml index 02fc26c2c8..f81b9ade71 100644 --- a/.github/workflows/win-build.yml +++ b/.github/workflows/win-build.yml @@ -15,6 +15,7 @@ env: MAX_JOBS: 6 DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc XFORMERS_BUILD_TYPE: "Release" + TMPDIR: "./x" jobs: win_build: @@ -35,19 +36,33 @@ jobs: run: shell: bash steps: - - name: Support longpaths - run: git config --system core.longpaths true + - name: Workarounds for longpaths - git-config + run: | + git config --system core.longpaths true - name: Recursive checkout uses: actions/checkout@v3 with: submodules: recursive path: "." + - name: Workarounds for longpaths - TMPDIR + run: | + mkdir x + python -c "import tempfile; print(tempfile.gettempdir())" + python -c "import tempfile; assert(len(tempfile.gettempdir()) < 30)" + + - name: HACKFIX for cutlass compiler bug + if: runner.os == 'Windows' + run: | + # See https://github.com/NVIDIA/cutlass/issues/1732 + rm -f third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp + touch third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp + - name: Setup Runner uses: ./.github/actions/setup-build-cuda with: toolkit_type: "cuda" - toolkit_short_version: "124" + toolkit_short_version: "126" python: "3.9" - name: Remove internal code @@ -59,18 +74,26 @@ jobs: - name: Install build dependencies run: | - $PY -m pip install wheel setuptools ninja torch==2.5.1 -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 + $PY -m pip install wheel setuptools ninja torch==2.6.0 -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu126 git config --global --add safe.directory "*" $PY -c "import torch; print('torch', torch.__version__)" $PY -c "import torch; print('torch.cuda', torch.version.cuda)" + ninja --version - name: Create sdist run: $PY setup.py sdist - name: Build from sdist - run: $PY -m pip install -v dist/* + shell: bash -l {0} + run: | + $PY -m pip install -v dist/* - name: Info run: | cd ../../ # So we don't have a folder named `xformers` XFORMERS_MORE_DETAILS=1 $PY -m xformers.info + + # Open an SSH session on failure to debug + # - name: Setup tmate session + # if: ${{ failure() }} + # uses: mxschmitt/action-tmate@v3 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0990bd2476..bbdf675c46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.29.post2] - 2025-01-31 +Pre-built binary wheels are available for PyTorch 2.6.0. Following PyTorch, we build wheels for CUDA 11.8, 12.4, and 12.6 only (we no longer build for CUDA 12.1). +xFormers now requires PyTorch >= 2.6 + + ## [0.0.29] - 2024-12-27 ### Improved: - [fMHA] Creating a `LowerTriangularMask` no longer creates a CUDA tensor diff --git a/README.md b/README.md index 9332159006..bd181d87cd 100644 --- a/README.md +++ b/README.md @@ -25,17 +25,17 @@ xFormers is: ## Installing xFormers -* **(RECOMMENDED, linux & win) Install latest stable with pip**: Requires [PyTorch 2.5.1](https://pytorch.org/get-started/locally/) +* **(RECOMMENDED, linux & win) Install latest stable with pip**: Requires [PyTorch 2.6.0](https://pytorch.org/get-started/locally/) ```bash # [linux only] cuda 11.8 version pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu118 -# [linux only] cuda 12.1 version -pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu121 # [linux & win] cuda 12.4 version pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu124 -# [linux only] (EXPERIMENTAL) rocm 6.1 version -pip3 install -U xformers --index-url https://download.pytorch.org/whl/rocm6.1 +# [linux & win] cuda 12.6 version +pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu126 +# [linux only] (EXPERIMENTAL) rocm 6.2.4 version +pip3 install -U xformers --index-url https://download.pytorch.org/whl/rocm6.2.4 ``` * **Development binaries**: diff --git a/requirements.txt b/requirements.txt index f1fe423f23..fb1e7934ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ # Example requirement, can be anything that pip knows # install with `pip install -r requirements.txt`, and make sure that CI does the same -torch >= 2.4 +torch >= 2.6 numpy diff --git a/xformers/components/attention/core.py b/xformers/components/attention/core.py index 3a201fb512..3e80e917dc 100644 --- a/xformers/components/attention/core.py +++ b/xformers/components/attention/core.py @@ -103,7 +103,7 @@ def _matmul_with_mask( repeat_factor = att.shape[0] // mask.shape[0] mask = mask.repeat([repeat_factor, 1, 1]) logger.info("Mismatched batch dimensions for mask, repeating mask.") - att += mask + att += mask # type: ignore return att