[AMD] Pipeline small vector width but not thru shared memory #13061

Workflow file for this run

.github/workflows/integration-tests.yml at 810dd41

	# AUTOGENERATED by pre-commit, modify the .in file instead.

	# integration-tests.yml.in is used to generate integration-tests.yml by
	# expanding yaml anchors, because github actions don't support them
	# (https://github.com/actions/runner/issues/1182). pre-commit will do this for
	# you automatically.


	name: Integration Tests
	on:
	workflow_dispatch:
	pull_request:
	branches-ignore: ['llvm-**']
	merge_group:
	branches: [main, 'dev-**']
	types: [checks_requested]
	push:
	branches: [main]
	concurrency:
	group: ${{ github.ref }}
	cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
	permissions: read-all
	env:
	TRITON_BUILD_WITH_CCACHE: "true"
	TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
	TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
	TRITON_DISABLE_LINE_INFO: 1
	PROTON_SKIP_PC_SAMPLING_TEST: 1
	CCACHE_COMPRESS: "true"
	jobs:
	Runner-Preparation:
	runs-on: ubuntu-latest
	timeout-minutes: 30
	outputs:
	matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
	matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
	matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
	steps:
	- name: Decide pre-submit integration test enablement
	# Always enable integration tests for pre-submit pull requests.
	if: github.event_name == 'pull_request'
	run: \|
	echo "enable_integration=true" >> $GITHUB_ENV
	- name: Decide manual trigger integration test enablement
	# Always enable integration tests when manually triggered
	if: github.event_name == 'workflow_dispatch'
	run: \|
	echo "enable_integration=true" >> $GITHUB_ENV
	- name: Checkout post-submit commits
	if: github.event_name == 'push'
	uses: actions/checkout@v4
	with:
	# Only fetch two commits to check the latest changed files.
	fetch-depth: 2
	- name: Detect if build deps (e.g. LLVM hash) changed
	id: detect-change
	if: github.event_name == 'push'
	uses: tj-actions/changed-files@v45
	with:
	files: \|
	cmake/*.txt
	cmake/*.json
	- name: Detect if enough time has passed since last post-submit run
	id: detect-time
	if: github.event_name == 'push'
	run: \|
	GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
	REPO_NAME="${{ github.repository }}"
	# ID of integration-tests workflow
	WORKFLOW_ID="11678186"

	# Fetch the last run time of this workflow
	LAST_RUN=$(curl -s \
	-H "Authorization: token $GITHUB_TOKEN" \
	-H "Accept: application/vnd.github.v3+json" \
	"https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \
	\| jq -r '.workflow_runs[0].updated_at')

	# Convert to timestamp
	LAST_RUN_TS=$(date -d "$LAST_RUN" +%s)
	NOW_TS=$(date +%s)
	DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours

	echo "Last run was $DIFF hours ago."

	if [ "$DIFF" -ge 4 ]; then
	echo "Will run CI; last build was long enough ago."
	echo "n_hours_since_last_run=true" >> $GITHUB_ENV
	else
	echo "Will not run CI; last build was too recent."
	echo "n_hours_since_last_run=false" >> $GITHUB_ENV
	fi
	# We want to run integration tests on the main branch (i.e. post-submit)
	# occasionally, because pre-submit CI caches will only read from caches
	# generated from the main branch (or the PR's branch), and we want these
	# caches to be recent.
	#
	# But we also don't want to run the tests on every commit, because this
	# would compete for resources with pre-commit CI (and the whole point of
	# caching is to speed up CI).
	#
	# As a compromise, run every N hours, or if a build dependency changes
	# (e.g. we update the LLVM hash).
	- name: Decide whether to run integration tests post-submit
	if: \|
	github.event_name == 'push' &&
	(steps.detect-change.outputs.any_changed == 'true' \|\|
	env.n_hours_since_last_run == 'true')
	run: \|
	echo "enable_integration=true" >> $GITHUB_ENV
	- name: Prepare runner matrix
	id: set-matrix
	if: env.enable_integration == 'true'
	run: \|
	if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
	echo '::set-output name=matrix-CUDA::[["a100-runner-set"], ["h100-runner-set"]]'
	echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"], ["self-hosted", "gfx942"]]'
	echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
	else
	echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
	echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
	echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
	fi
	pre-commit:
	name: pre-commit (code formatting)
	needs: Runner-Preparation
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'
	cache: 'pip'
	- name: Compute hash of pre-commit config
	id: cache-key
	run: \|
	echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml \| cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
	shell: bash
	- name: Cache pre-commit's cache dir
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.cache/pre-commit
	key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}
	- name: Check pre-commit
	run: \|
	python3 -m pip install --upgrade pre-commit
	python3 -m pre_commit run --all-files --verbose
	- name: Print diff of changes if pre-commit failed
	if: failure()
	run: \|
	git diff
	Integration-Tests:
	needs: Runner-Preparation
	if: needs.Runner-Preparation.outputs.matrix-CUDA != ''
	runs-on: ${{ matrix.runner }}
	timeout-minutes: 30
	strategy:
	matrix:
	runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
	env:
	RUNNER_TYPE: ${{ matrix.runner[0] }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: "true"
	- name: Compute cache keys
	id: cache-key
	run: \|
	llvm_file="cmake/llvm-hash.txt"
	nvidia_file="cmake/nvidia-toolchain-version.json"
	json_file="cmake/json-version.txt"

	# Check if files exist before proceeding
	if [[ ! -f "$llvm_file" \|\| ! -f "$nvidia_file" \|\| ! -f "$json_file" ]]; then
	echo "Error: Required dependency files are missing."
	exit 1
	fi

	# Process the files if they exist
	echo "llvm=$(cat $llvm_file \| cut -c 1-8)" >> $GITHUB_OUTPUT
	echo "nvidia=$(sha256sum $nvidia_file \| cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
	echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
	echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
	shell: bash
	- name: Cache build dependencies
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.triton/llvm
	~/.triton/nvidia
	~/.triton/json
	key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
	- # Cache ~/.triton/cache because the vast majority of unit test time is
	# spent compiling. Triton won't (well, should not) use these cached files
	# if something internal to Triton changes, because Triton's internal
	# source code is part of the cache key.
	#
	# Similarly, cache ~/.cache/ccache to speed up compilation.
	#
	# On branch `main` we always start from an empty cache, i.e. we skip the
	# "restore" step. This is to prevent the caches from accumulating stale
	# files over time.
	name: Restore cache of ccache and Triton compilation artifacts
	id: restore-build-cache
	if: github.ref != 'refs/heads/main'
	uses: actions/cache/restore@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	# Restore the most recent cache entry.
	restore-keys: \|
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
	# We expect this cache key never to hit and for us to fall back
	# unconditionally to the restore-key, so it doesn't actually matter
	# what we put here (so long as it doesn't hit an existing key).
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- name: Update PATH
	run: \|
	echo "$HOME/.local/bin" >> $GITHUB_PATH
	- name: Install pip dependencies
	run: \|
	python3 -m pip install --upgrade pip
	python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-forked pytest-xdist lit
	- name: Install Triton
	env:
	CUDA_HOME: "/usr/local/cuda"
	run: \|
	echo "PATH is '$PATH'"
	cd python
	ccache --zero-stats
	python3 -m pip install -v '.[tests]'
	- name: CCache Stats
	run: ccache --print-stats
	- name: Run lit tests
	run: \|
	cd python
	LIT_TEST_DIR="build/$(ls build \| grep -i cmake)/test"
	if [ ! -d "${LIT_TEST_DIR}" ]; then
	echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
	fi
	lit -v "${LIT_TEST_DIR}"
	- name: Run python tests on CUDA
	run: \|
	INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/build/$(ls python/build \| grep -i lib)/triton/instrumentation"
	if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
	echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
	fi
	cd python/test/unit
	python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
	python3 -m pytest -s -n 8 language/test_subprocess.py
	python3 -m pytest -s -n 8 test_debug.py --forked
	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s language/test_line_info.py
	# Run hopper/test_flashattention.py separately to avoid out of gpu memory
	python3 -m pytest -s hopper/test_flashattention.py
	TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
	python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
	- name: Run interpreter tests
	if: ${{ matrix.runner[0] == 'h100-runner-set' }}
	env:
	TRITON_INTERPRET: "1"
	run: \|
	cd python/test/unit
	python3 -m pytest -s -n 16 -m interpreter language/test_core.py language/test_standard.py \
	language/test_random.py language/test_block_pointer.py language/test_subprocess.py language/test_line_info.py \
	runtime/test_autotuner.py::test_kwargs[False]\
	../../tutorials/06-fused-attention.py::test_op --device cpu
	- name: Run regression tests
	run: \|
	cd python/test/regression
	python3 -m pytest -s -n 8 .
	- name: Run C++ unittests
	run: \|
	cd python
	cd "build/$(ls build \| grep -i cmake)"
	ctest -j32
	- name: Run Proton tests
	run: \|
	cd third_party/proton/test
	python3 -m pytest -s .
	cd ..
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- # If we're on branch `main`, save the ccache Triton compilation artifacts
	# to the cache so they can be used by other (non-main) CI runs.
	#
	# (It wouldn't be a problem to save the cache on every run, because github
	# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
	name: Save ccache and Triton compilation artifacts to cache
	if: github.ref == 'refs/heads/main'
	uses: actions/cache/save@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
	Integration-Tests-AMD:
	needs: Runner-Preparation
	if: needs.Runner-Preparation.outputs.matrix-HIP != ''
	runs-on: ${{ matrix.runner }}
	timeout-minutes: 30
	env:
	RUNNER_TYPE: ${{ matrix.runner[1] }}
	strategy:
	matrix:
	runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
	name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' \|\| 'mi300x'}})
	container:
	image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4
	options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: 'true'
	- name: Compute cache keys
	id: cache-key
	run: \|
	llvm_file="cmake/llvm-hash.txt"
	nvidia_file="cmake/nvidia-toolchain-version.json"
	json_file="cmake/json-version.txt"

	# Check if files exist before proceeding
	if [[ ! -f "$llvm_file" \|\| ! -f "$nvidia_file" \|\| ! -f "$json_file" ]]; then
	echo "Error: Required dependency files are missing."
	exit 1
	fi

	# Process the files if they exist
	echo "llvm=$(cat $llvm_file \| cut -c 1-8)" >> $GITHUB_OUTPUT
	echo "nvidia=$(sha256sum $nvidia_file \| cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
	echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
	echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
	shell: bash
	- name: Cache build dependencies
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.triton/llvm
	~/.triton/nvidia
	~/.triton/json
	key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
	- # Cache ~/.triton/cache because the vast majority of unit test time is
	# spent compiling. Triton won't (well, should not) use these cached files
	# if something internal to Triton changes, because Triton's internal
	# source code is part of the cache key.
	#
	# Similarly, cache ~/.cache/ccache to speed up compilation.
	#
	# On branch `main` we always start from an empty cache, i.e. we skip the
	# "restore" step. This is to prevent the caches from accumulating stale
	# files over time.
	name: Restore cache of ccache and Triton compilation artifacts
	id: restore-build-cache
	if: github.ref != 'refs/heads/main'
	uses: actions/cache/restore@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	# Restore the most recent cache entry.
	restore-keys: \|
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
	# We expect this cache key never to hit and for us to fall back
	# unconditionally to the restore-key, so it doesn't actually matter
	# what we put here (so long as it doesn't hit an existing key).
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- name: Update PATH
	run: \|
	echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
	- name: Install pip dependencies
	run: \|
	python3 -m pip install --upgrade pip
	python3 -m pip install lit
	- name: Install apt dependencies
	run: \|
	apt update
	apt install ccache
	- name: Install Triton
	id: amd-install-triton
	run: \|
	echo "PATH is '$PATH'"
	pip uninstall -y triton
	cd python
	ccache --zero-stats
	pip install -v -e '.[tests]'
	- name: Clean up after an unsuccessful build
	if: ${{ !success() && steps.amd-install-triton.outcome != 'success' }}
	run: \|
	rm -rf ~/.triton
	- name: CCache Stats
	run: ccache --print-stats
	- name: Run lit tests
	run: \|
	cd python
	LIT_TEST_DIR="build/$(ls build \| grep -i cmake)/test"
	if [ ! -d "${LIT_TEST_DIR}" ]; then
	echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
	fi
	lit -v "${LIT_TEST_DIR}"
	- name: Run python tests on HIP
	run: \|
	INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
	if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then
	echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
	fi
	pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py
	pytest --capture=tee-sys -rfs third_party/amd/python/test/test_extract_slice.py
	cd python/test/unit
	pytest --capture=tee-sys -rfs -n 16 language runtime \
	--ignore=language/test_line_info.py \
	--ignore=test_debug.py
	# TODO: uncomment
	# pytest --capture=tee-sys -rfs test_debug.py
	TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
	pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py

	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
	- name: Run regression tests
	run: \|
	# Reenable test_functional_regression.py once it's fixed
	cd python/test/regression
	python3 -m pytest -s -n 8 ./test_cast_matmul.py
	- name: Run Proton tests
	run: \|
	cd third_party/proton/test
	python3 -m pytest -s .
	cd ..
	- name: Run C++ unittests
	run: \|
	cd python
	cd "build/$(ls build \| grep -i cmake)"
	ctest -j32
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- # If we're on branch `main`, save the ccache Triton compilation artifacts
	# to the cache so they can be used by other (non-main) CI runs.
	#
	# (It wouldn't be a problem to save the cache on every run, because github
	# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
	name: Save ccache and Triton compilation artifacts to cache
	if: github.ref == 'refs/heads/main'
	uses: actions/cache/save@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
	- name: Clean up caches
	run: \|
	rm -rf ~/.triton/cache
	Build-Tests:
	needs: Runner-Preparation
	if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
	runs-on: ${{ matrix.runner }}
	timeout-minutes: 40
	strategy:
	matrix:
	runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
	env:
	RUNNER_TYPE: ${{ matrix.runner[0] }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: "true"
	- name: Install brew dependencies
	run: \|
	brew update
	brew install ccache llvm@19 lld coreutils
	- name: Compute cache keys
	id: cache-key
	run: \|
	llvm_file="cmake/llvm-hash.txt"
	nvidia_file="cmake/nvidia-toolchain-version.json"
	json_file="cmake/json-version.txt"

	# Check if files exist before proceeding
	if [[ ! -f "$llvm_file" \|\| ! -f "$nvidia_file" \|\| ! -f "$json_file" ]]; then
	echo "Error: Required dependency files are missing."
	exit 1
	fi

	# Process the files if they exist
	echo "llvm=$(cat $llvm_file \| cut -c 1-8)" >> $GITHUB_OUTPUT
	echo "nvidia=$(sha256sum $nvidia_file \| cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
	echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
	echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
	shell: bash
	- name: Cache build dependencies
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.triton/llvm
	~/.triton/nvidia
	~/.triton/json
	key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
	- # Cache ~/.triton/cache because the vast majority of unit test time is
	# spent compiling. Triton won't (well, should not) use these cached files
	# if something internal to Triton changes, because Triton's internal
	# source code is part of the cache key.
	#
	# Similarly, cache ~/.cache/ccache to speed up compilation.
	#
	# On branch `main` we always start from an empty cache, i.e. we skip the
	# "restore" step. This is to prevent the caches from accumulating stale
	# files over time.
	name: Restore cache of ccache and Triton compilation artifacts
	id: restore-build-cache
	if: github.ref != 'refs/heads/main'
	uses: actions/cache/restore@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	# Restore the most recent cache entry.
	restore-keys: \|
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
	triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
	# We expect this cache key never to hit and for us to fall back
	# unconditionally to the restore-key, so it doesn't actually matter
	# what we put here (so long as it doesn't hit an existing key).
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- name: Update PATH
	run: \|
	echo "$HOME/.local/bin" >> $GITHUB_PATH
	echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
	- name: Install pip dependencies
	run: \|
	python3 -m venv ~/.venv
	source ~/.venv/bin/activate
	python3 -m pip install --upgrade pip
	python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit pybind11
	- name: Install Triton
	env:
	TRITON_BUILD_WITH_O1: "true"
	# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
	# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
	MAX_JOBS: 3
	run: \|
	source ~/.venv/bin/activate
	echo "PATH is '$PATH'"
	cd python
	ccache --zero-stats
	python3 -m pip install -v --no-build-isolation .
	- name: CCache Stats
	run: ccache --print-stats
	- name: Inspect cache directories
	run: \|
	mkdir -p ~/.triton
	du -h -d 1 ~/.triton

	mkdir -p ~/.ccache
	du -h -d 1 ~/.ccache
	- # If we're on branch `main`, save the ccache Triton compilation artifacts
	# to the cache so they can be used by other (non-main) CI runs.
	#
	# (It wouldn't be a problem to save the cache on every run, because github
	# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
	name: Save ccache and Triton compilation artifacts to cache
	if: github.ref == 'refs/heads/main'
	uses: actions/cache/save@v4
	with:
	path: \|
	~/.triton/cache
	~/.ccache
	key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMD] Pipeline small vector width but not thru shared memory #13061

Workflow file

[AMD] Pipeline small vector width but not thru shared memory #13061

Jobs

Run details

Workflow file for this run