Skip to content

Commit

Permalink
Merge pull request NixOS#267209 from Madouura/pr/triton-llvm
Browse files Browse the repository at this point in the history
openai-triton-llvm: fix aarch64 and cross-compilation
  • Loading branch information
wegank authored Nov 17, 2023
2 parents 9415631 + e3d4bea commit 4774c53
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 44 deletions.
127 changes: 89 additions & 38 deletions pkgs/by-name/op/openai-triton-llvm/package.nix
Original file line number Diff line number Diff line change
@@ -1,27 +1,49 @@
{ config
, lib
{ lib
, stdenv
, fetchFromGitHub
, pkgsBuildBuild
, pkg-config
, cmake
, ninja
, git
, doxygen
, sphinx
, libxml2
, libxcrypt
, libedit
, libffi
, libpfm
, mpfr
, zlib
, ncurses
, doxygen
, sphinx
, which
, sysctl
, python3Packages
, buildDocs ? true
, buildMan ? true
, buildTests ? true
, llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv
, llvmProjectsToBuild ? [ "llvm" "mlir" ]
}:

stdenv.mkDerivation (finalAttrs: {
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then "X86"
else if stdenv.hostPlatform.isAarch64 then "AArch64"
else throw "Currently unsupported LLVM platform '${stdenv.hostPlatform.config}'";

inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
llvmTargetsToBuild' = [ "AMDGPU" "NVPTX" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;

# This LLVM version can't seem to find pygments/pyyaml,
# but a later update will likely fix this (openai-triton-2.1.0)
python =
if buildTests
then python3Packages.python.withPackages (p: with p; [ psutil pygments pyyaml ])
else python3Packages.python;

isNative = stdenv.hostPlatform == stdenv.buildPlatform;
in stdenv.mkDerivation (finalAttrs: {
pname = "openai-triton-llvm";
version = "14.0.6-f28c006a5895";

Expand All @@ -33,7 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
"man"
];

# See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases
# See https://github.com/openai/triton/blob/main/python/setup.py
# and https://github.com/ptillet/triton-llvm-releases/releases
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
Expand All @@ -46,7 +69,7 @@ stdenv.mkDerivation (finalAttrs: {
cmake
ninja
git
python3Packages.python
python
] ++ lib.optionals (buildDocs || buildMan) [
doxygen
sphinx
Expand All @@ -58,6 +81,7 @@ stdenv.mkDerivation (finalAttrs: {
libxcrypt
libedit
libffi
libpfm
mpfr
];

Expand All @@ -69,57 +93,84 @@ stdenv.mkDerivation (finalAttrs: {
sourceRoot = "${finalAttrs.src.name}/llvm";

cmakeFlags = [
"-DLLVM_TARGETS_TO_BUILD=${
let
# Targets can be found in
# https://github.com/llvm/llvm-project/tree/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/lib/Basic/Targets
# NOTE: Unsure of how "host" would function, especially given that we might be cross-compiling.
llvmTargets = [ "AMDGPU" "NVPTX" ]
++ lib.optionals stdenv.isAarch64 [ "AArch64" ]
++ lib.optionals stdenv.isx86_64 [ "X86" ];
in
lib.concatStringsSep ";" llvmTargets
}"
"-DLLVM_ENABLE_PROJECTS=llvm;mlir"
"-DLLVM_INSTALL_UTILS=ON"
] ++ lib.optionals (buildDocs || buildMan) [
"-DLLVM_INCLUDE_DOCS=ON"
"-DMLIR_INCLUDE_DOCS=ON"
"-DLLVM_BUILD_DOCS=ON"
# "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
"-DLLVM_ENABLE_SPHINX=ON"
"-DSPHINX_OUTPUT_HTML=ON"
"-DSPHINX_OUTPUT_MAN=ON"
"-DSPHINX_WARNINGS_AS_ERRORS=OFF"
] ++ lib.optionals buildTests [
"-DLLVM_INCLUDE_TESTS=ON"
"-DMLIR_INCLUDE_TESTS=ON"
"-DLLVM_BUILD_TESTS=ON"
];
(lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild'))
(lib.cmakeFeature "LLVM_ENABLE_PROJECTS" (lib.concatStringsSep ";" llvmProjectsToBuild))
(lib.cmakeFeature "LLVM_HOST_TRIPLE" stdenv.hostPlatform.config)
(lib.cmakeFeature "LLVM_DEFAULT_TARGET_TRIPLE" stdenv.hostPlatform.config)
(lib.cmakeBool "LLVM_INSTALL_UTILS" true)
(lib.cmakeBool "LLVM_INCLUDE_DOCS" (buildDocs || buildMan))
(lib.cmakeBool "MLIR_INCLUDE_DOCS" (buildDocs || buildMan))
(lib.cmakeBool "LLVM_BUILD_DOCS" (buildDocs || buildMan))
# Way too slow, only uses one core
# (lib.cmakeBool "LLVM_ENABLE_DOXYGEN" (buildDocs || buildMan))
(lib.cmakeBool "LLVM_ENABLE_SPHINX" (buildDocs || buildMan))
(lib.cmakeBool "SPHINX_OUTPUT_HTML" buildDocs)
(lib.cmakeBool "SPHINX_OUTPUT_MAN" buildMan)
(lib.cmakeBool "SPHINX_WARNINGS_AS_ERRORS" false)
(lib.cmakeBool "LLVM_INCLUDE_TESTS" buildTests)
(lib.cmakeBool "MLIR_INCLUDE_TESTS" buildTests)
(lib.cmakeBool "LLVM_BUILD_TESTS" buildTests)
# Cross compilation code taken/modified from LLVM 16 derivation
] ++ lib.optionals (!isNative) (let
nativeToolchainFlags = let
nativeCC = pkgsBuildBuild.targetPackages.stdenv.cc;
nativeBintools = nativeCC.bintools.bintools;
in [
(lib.cmakeFeature "CMAKE_C_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}cc")
(lib.cmakeFeature "CMAKE_CXX_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}c++")
(lib.cmakeFeature "CMAKE_AR" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ar")
(lib.cmakeFeature "CMAKE_STRIP" "${nativeBintools}/bin/${nativeBintools.targetPrefix}strip")
(lib.cmakeFeature "CMAKE_RANLIB" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ranlib")
];

# We need to repass the custom GNUInstallDirs values, otherwise CMake
# will choose them for us, leading to wrong results in llvm-config-native
nativeInstallFlags = [
(lib.cmakeFeature "CMAKE_INSTALL_PREFIX" (placeholder "out"))
(lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "${placeholder "out"}/bin")
(lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "${placeholder "out"}/include")
(lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "${placeholder "out"}/lib")
(lib.cmakeFeature "CMAKE_INSTALL_LIBEXECDIR" "${placeholder "out"}/libexec")
];
in [
(lib.cmakeBool "CMAKE_CROSSCOMPILING" true)
(lib.cmakeFeature "CROSS_TOOLCHAIN_FLAGS_NATIVE" (lib.concatStringsSep ";"
(lib.concatLists [ nativeToolchainFlags nativeInstallFlags ])))
]);

postPatch = ''
# `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied`
chmod +w -R ../mlir
patchShebangs ../mlir/test/mlir-reduce
# FileSystem permissions tests fail with various special bits
rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
rm unittests/Support/Path.cpp
substituteInPlace unittests/Support/CMakeLists.txt \
--replace "Path.cpp" ""
'' + lib.optionalString stdenv.isAarch64 ''
# Not sure why this fails
rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s
'';

postInstall = lib.optionalString (!isNative) ''
cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native
'';

doCheck = buildTests;

nativeCheckInputs = [ which ]
++ lib.optionals stdenv.isDarwin [ sysctl ];

checkTarget = "check-all";
requiredSystemFeatures = [ "big-parallel" ];

meta = with lib; {
description = "Collection of modular and reusable compiler and toolchain technologies";
homepage = "https://github.com/llvm/llvm-project";
license = with licenses; [ ncsa ];
maintainers = with maintainers; [ SomeoneSerge Madouura ];
platforms = platforms.linux;
# Consider the derivation broken if we're not building for CUDA or ROCm, or if we're building for aarch64
# and ROCm is enabled. See https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344.
broken = stdenv.isAarch64 && !config.cudaSupport;
platforms = with platforms; aarch64 ++ x86;
};
})
12 changes: 6 additions & 6 deletions pkgs/development/python-modules/torch/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
filelock,
jinja2,
networkx,
openai-triton,
sympy,
numpy, pyyaml, cffi, click, typing-extensions,
# ROCm build and `torch.compile` requires `openai-triton`
tritonSupport ? (!stdenv.isDarwin), openai-triton,

# Unit tests
hypothesis, psutil,
Expand Down Expand Up @@ -303,12 +304,13 @@ in buildPythonPackage rec {
"-Wno-pass-failed"
] ++ [
"-Wno-unused-command-line-argument"
"-Wno-maybe-uninitialized"
"-Wno-uninitialized"
"-Wno-array-bounds"
"-Wno-stringop-overflow"
"-Wno-free-nonheap-object"
"-Wno-unused-result"
] ++ lib.optionals stdenv.cc.isGNU [
"-Wno-maybe-uninitialized"
"-Wno-stringop-overflow"
]));

nativeBuildInputs = [
Expand Down Expand Up @@ -377,12 +379,10 @@ in buildPythonPackage rec {
# the following are required for tensorboard support
pillow six future tensorboard protobuf

# ROCm build and `torch.compile` requires openai-triton
openai-triton

# torch/csrc requires `pybind11` at runtime
pybind11
]
++ lib.optionals tritonSupport [ openai-triton ]
++ lib.optionals MPISupport [ mpi ]
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];

Expand Down

0 comments on commit 4774c53

Please sign in to comment.