diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 60f77f16..98faa8df 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -7,7 +7,7 @@ on: branches: [ "main" ] jobs: - test-plugin: + call-workflow: uses: ./.github/workflows/test-plugin.yml with: triton-ref: '05dc28be0e72dd496300a31b99a21a5a5118f8e9' # known good commit "[CI] refactor workflows (#2504)" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index db2ad468..a8d8239c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -8,5 +8,5 @@ if(TRITON_BUILD_PYTHON_MODULE) ${CMAKE_CURRENT_SOURCE_DIR}/ExecutionEngine/CRunnerUtils.h ${CMAKE_CURRENT_SOURCE_DIR}/ExecutionEngine/CRunnerUtils.cpp DESTINATION ${PYTHON_THIRD_PARTY_PATH}/cpu/) - # TODO: perhaps we want to install binary files used in __init__.py + # TODO: perhaps we want to install binary files used by __init__.py endif() diff --git a/python/__init__.py b/python/__init__.py index 9c38b4b3..ee62a437 100644 --- a/python/__init__.py +++ b/python/__init__.py @@ -14,7 +14,7 @@ def _get_triton_shared_opt_path() -> str: path = os.getenv("TRITON_SHARED_OPT_PATH", "") if path == "": - assert Exception("TRITON_SHARED_OPT_PATH is not set.") + raise Exception("TRITON_SHARED_OPT_PATH is not set.") return path @@ -22,7 +22,7 @@ def _get_llvm_bin_path(bin_name: str) -> str: path = os.getenv("LLVM_BINARY_DIR", "") if path == "": raise Exception("LLVM_BINARY_DIR is not set.") - return f"{path}/{bin_name}" + return os.path.join(path, bin_name) def _ttir_to_ttsharedir(mod): @@ -33,8 +33,7 @@ def _ttir_to_ttsharedir(mod): dst_path = os.path.join(tmpdir, "ttshared.mlir") Path(src_path).write_text(ttir_code) triton_shared_opt_path = _get_triton_shared_opt_path() - ret = subprocess.check_call([triton_shared_opt_path, src_path, "--triton-to-linalg", "-o", dst_path]) - assert ret == 0 + subprocess.check_call([triton_shared_opt_path, src_path, "--triton-to-linalg", "-o", dst_path]) return Path(dst_path).read_text() @@ -51,7 +50,7 @@ def _ttsharedir_to_llir(ttsharedir: str): Path(ttshared_path).write_text(ttsharedir) mlir_opt_path = _get_llvm_bin_path("mlir-opt") # TritonShared-MLIR to LLVM-MLIR - ret = subprocess.check_call([mlir_opt_path, ttshared_path, + subprocess.check_call([mlir_opt_path, ttshared_path, "--convert-linalg-to-affine-loops", "--eliminate-empty-tensors", "--empty-tensor-to-alloc-tensor", @@ -72,15 +71,13 @@ def _ttsharedir_to_llir(ttsharedir: str): "--reconcile-unrealized-casts", "-o", llmlir_path]) - assert ret == 0 # LLVM-MLIR to LLVM-IR mlir_translate_path = _get_llvm_bin_path("mlir-translate") - ret = subprocess.check_call([mlir_translate_path, llmlir_path, + subprocess.check_call([mlir_translate_path, llmlir_path, "--mlir-to-llvmir", "-o", llir_path]) - assert ret == 0 return Path(llir_path).read_text() @@ -93,11 +90,9 @@ def _llir_to_bin(llir: str): with tempfile.TemporaryDirectory() as tmpdir: src_path = os.path.join(tmpdir, "kernel.ll") dst_path = os.path.join(tmpdir, "kernel.o") - with open(src_path, "w") as f: - f.write(llir) + Path(src_path).write_text(llir) llc_path = _get_llvm_bin_path("llc") - ret = subprocess.check_call([llc_path, src_path, "-o", dst_path]) - assert ret == 0 + subprocess.check_call([llc_path, src_path, "-o", dst_path]) # Actually it's text-format assembly. Use read_text(). return Path(dst_path).read_text() @@ -346,25 +341,27 @@ def make_launcher_stub(self, name, signature, constants, ids): so_name = f"{name}.py" # retrieve stub from cache if it exists cache_path = so_cache_manager.get_file(so_name) - if cache_path is None: - kernel_placeholder_name = "KERNEL_NAME_PLACEHOLDER" - with tempfile.TemporaryDirectory() as tmpdir: - # Later KERNEL_NAME_PLACEHOLDER will be used to assign the kernel name - # in the following launch function. - launcher_src = _generate_launcher(constants, signature, kernel_placeholder_name) - # This function was renamed and made public in Python 3.10 - if hasattr(sysconfig, 'get_default_scheme'): - scheme = sysconfig.get_default_scheme() - else: - scheme = sysconfig._get_default_scheme() - # 'posix_local' is a custom scheme on Debian. However, starting Python 3.10, the default install - # path changes to include 'local'. This change is required to use triton with system-wide python. - if scheme == 'posix_local': - scheme = 'posix_prefix' - py_include_dir = sysconfig.get_paths(scheme=scheme)["include"] - - dst_path = os.path.join(tmpdir, so_name) - py_src = f""" + if cache_path is not None: + return cache_path + + kernel_placeholder_name = "KERNEL_NAME_PLACEHOLDER" + with tempfile.TemporaryDirectory() as tmpdir: + # Later KERNEL_NAME_PLACEHOLDER will be used to assign the kernel name + # in the following launch function. + launcher_src = _generate_launcher(constants, signature, kernel_placeholder_name) + # This function was renamed and made public in Python 3.10 + if hasattr(sysconfig, 'get_default_scheme'): + scheme = sysconfig.get_default_scheme() + else: + scheme = sysconfig._get_default_scheme() + # 'posix_local' is a custom scheme on Debian. However, starting Python 3.10, the default install + # path changes to include 'local'. This change is required to use triton with system-wide python. + if scheme == 'posix_local': + scheme = 'posix_prefix' + py_include_dir = sysconfig.get_paths(scheme=scheme)["include"] + + dst_path = os.path.join(tmpdir, so_name) + py_src = f""" import os, subprocess, tempfile import importlib.util from pathlib import Path @@ -385,9 +382,7 @@ def launch(gridX, gridY, gridZ, num_warps, num_ctas, clusterDim0, clusterDim1, c Path(asm_src_path).write_text(asm_src) Path(launcher_src_path).write_text(launcher_src) # Compile it together. - ret = subprocess.check_call(["g++", launcher_src_path, asm_src_path, f"-I{py_include_dir}", f"-I{Path(__file__).resolve().parent}", "-shared", "-fPIC", "-o", so_path]) - if ret != 0: - raise AssertionError("Kernel compilation failed.") + subprocess.check_call(["g++", launcher_src_path, asm_src_path, f"-I{py_include_dir}", f"-I{Path(__file__).resolve().parent}", "-shared", "-fPIC", "-o", so_path]) # Load and launch the compiled kernel. spec = importlib.util.spec_from_file_location("__triton_shared_ref_cpu_kernel_launcher", so_path) @@ -395,11 +390,7 @@ def launch(gridX, gridY, gridZ, num_warps, num_ctas, clusterDim0, clusterDim1, c spec.loader.exec_module(mod) return mod.launch(gridX, gridY, gridZ, launch_enter_hook, launch_exit_hook, compiled_kernel, *args) """ - Path(dst_path).write_text(py_src) - with open(dst_path, "rb") as f: - return so_cache_manager.put(f.read(), so_name, binary=True) - else: - return cache_path + return so_cache_manager.put(py_src, so_name, binary=False) register_backend("cpu", TritonSharedRefCPUBackend)