Merge pull request NixOS#267209 from Madouura/pr/triton-llvm

openai-triton-llvm: fix aarch64 and cross-compilation
khaneliman · Nov 17, 2023 · 4774c53 · 4774c53
2 parents 9415631 + e3d4bea
commit 4774c53
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 44 deletions.
diff --git a/pkgs/by-name/op/openai-triton-llvm/package.nix b/pkgs/by-name/op/openai-triton-llvm/package.nix
@@ -1,27 +1,49 @@
-{ config
-, lib
+{ lib
 , stdenv
 , fetchFromGitHub
+, pkgsBuildBuild
 , pkg-config
 , cmake
 , ninja
 , git
-, doxygen
-, sphinx
 , libxml2
 , libxcrypt
 , libedit
 , libffi
+, libpfm
 , mpfr
 , zlib
 , ncurses
+, doxygen
+, sphinx
+, which
+, sysctl
 , python3Packages
 , buildDocs ? true
 , buildMan ? true
 , buildTests ? true
+, llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv
+, llvmProjectsToBuild ? [ "llvm" "mlir" ]
 }:
 
-stdenv.mkDerivation (finalAttrs: {
+let
+  llvmNativeTarget =
+    if stdenv.hostPlatform.isx86_64 then "X86"
+    else if stdenv.hostPlatform.isAarch64 then "AArch64"
+    else throw "Currently unsupported LLVM platform '${stdenv.hostPlatform.config}'";
+
+  inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
+  llvmTargetsToBuild' = [ "AMDGPU" "NVPTX" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;
+
+  # This LLVM version can't seem to find pygments/pyyaml,
+  # but a later update will likely fix this (openai-triton-2.1.0)
+  python =
+    if buildTests
+    then python3Packages.python.withPackages (p: with p; [ psutil pygments pyyaml ])
+    else python3Packages.python;
+
+  isNative = stdenv.hostPlatform == stdenv.buildPlatform;
+in stdenv.mkDerivation (finalAttrs: {
   pname = "openai-triton-llvm";
   version = "14.0.6-f28c006a5895";
 
@@ -33,7 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
     "man"
   ];
 
-  # See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases
+  # See https://github.com/openai/triton/blob/main/python/setup.py
+  # and https://github.com/ptillet/triton-llvm-releases/releases
   src = fetchFromGitHub {
     owner = "llvm";
     repo = "llvm-project";
@@ -46,7 +69,7 @@ stdenv.mkDerivation (finalAttrs: {
     cmake
     ninja
     git
-    python3Packages.python
+    python
   ] ++ lib.optionals (buildDocs || buildMan) [
     doxygen
     sphinx
@@ -58,6 +81,7 @@ stdenv.mkDerivation (finalAttrs: {
     libxcrypt
     libedit
     libffi
+    libpfm
     mpfr
   ];
 
@@ -69,57 +93,84 @@ stdenv.mkDerivation (finalAttrs: {
   sourceRoot = "${finalAttrs.src.name}/llvm";
 
   cmakeFlags = [
-    "-DLLVM_TARGETS_TO_BUILD=${
-      let
-        # Targets can be found in
-        # https://github.com/llvm/llvm-project/tree/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/lib/Basic/Targets
-        # NOTE: Unsure of how "host" would function, especially given that we might be cross-compiling.
-        llvmTargets = [ "AMDGPU" "NVPTX" ]
-        ++ lib.optionals stdenv.isAarch64 [ "AArch64" ]
-        ++ lib.optionals stdenv.isx86_64 [ "X86" ];
-      in
-      lib.concatStringsSep ";" llvmTargets
-    }"
-    "-DLLVM_ENABLE_PROJECTS=llvm;mlir"
-    "-DLLVM_INSTALL_UTILS=ON"
-  ] ++ lib.optionals (buildDocs || buildMan) [
-    "-DLLVM_INCLUDE_DOCS=ON"
-    "-DMLIR_INCLUDE_DOCS=ON"
-    "-DLLVM_BUILD_DOCS=ON"
-    # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
-    "-DLLVM_ENABLE_SPHINX=ON"
-    "-DSPHINX_OUTPUT_HTML=ON"
-    "-DSPHINX_OUTPUT_MAN=ON"
-    "-DSPHINX_WARNINGS_AS_ERRORS=OFF"
-  ] ++ lib.optionals buildTests [
-    "-DLLVM_INCLUDE_TESTS=ON"
-    "-DMLIR_INCLUDE_TESTS=ON"
-    "-DLLVM_BUILD_TESTS=ON"
-  ];
+    (lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild'))
+    (lib.cmakeFeature "LLVM_ENABLE_PROJECTS" (lib.concatStringsSep ";" llvmProjectsToBuild))
+    (lib.cmakeFeature "LLVM_HOST_TRIPLE" stdenv.hostPlatform.config)
+    (lib.cmakeFeature "LLVM_DEFAULT_TARGET_TRIPLE" stdenv.hostPlatform.config)
+    (lib.cmakeBool "LLVM_INSTALL_UTILS" true)
+    (lib.cmakeBool "LLVM_INCLUDE_DOCS" (buildDocs || buildMan))
+    (lib.cmakeBool "MLIR_INCLUDE_DOCS" (buildDocs || buildMan))
+    (lib.cmakeBool "LLVM_BUILD_DOCS" (buildDocs || buildMan))
+    # Way too slow, only uses one core
+    # (lib.cmakeBool "LLVM_ENABLE_DOXYGEN" (buildDocs || buildMan))
+    (lib.cmakeBool "LLVM_ENABLE_SPHINX" (buildDocs || buildMan))
+    (lib.cmakeBool "SPHINX_OUTPUT_HTML" buildDocs)
+    (lib.cmakeBool "SPHINX_OUTPUT_MAN" buildMan)
+    (lib.cmakeBool "SPHINX_WARNINGS_AS_ERRORS" false)
+    (lib.cmakeBool "LLVM_INCLUDE_TESTS" buildTests)
+    (lib.cmakeBool "MLIR_INCLUDE_TESTS" buildTests)
+    (lib.cmakeBool "LLVM_BUILD_TESTS" buildTests)
+  # Cross compilation code taken/modified from LLVM 16 derivation
+  ] ++ lib.optionals (!isNative) (let
+    nativeToolchainFlags = let
+      nativeCC = pkgsBuildBuild.targetPackages.stdenv.cc;
+      nativeBintools = nativeCC.bintools.bintools;
+    in [
+      (lib.cmakeFeature "CMAKE_C_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}cc")
+      (lib.cmakeFeature "CMAKE_CXX_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}c++")
+      (lib.cmakeFeature "CMAKE_AR" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ar")
+      (lib.cmakeFeature "CMAKE_STRIP" "${nativeBintools}/bin/${nativeBintools.targetPrefix}strip")
+      (lib.cmakeFeature "CMAKE_RANLIB" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ranlib")
+    ];
+
+    # We need to repass the custom GNUInstallDirs values, otherwise CMake
+    # will choose them for us, leading to wrong results in llvm-config-native
+    nativeInstallFlags = [
+      (lib.cmakeFeature "CMAKE_INSTALL_PREFIX" (placeholder "out"))
+      (lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "${placeholder "out"}/bin")
+      (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "${placeholder "out"}/include")
+      (lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "${placeholder "out"}/lib")
+      (lib.cmakeFeature "CMAKE_INSTALL_LIBEXECDIR" "${placeholder "out"}/libexec")
+    ];
+  in [
+    (lib.cmakeBool "CMAKE_CROSSCOMPILING" true)
+    (lib.cmakeFeature "CROSS_TOOLCHAIN_FLAGS_NATIVE" (lib.concatStringsSep ";"
+      (lib.concatLists [ nativeToolchainFlags nativeInstallFlags ])))
+  ]);
 
   postPatch = ''
     # `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied`
     chmod +w -R ../mlir
+    patchShebangs ../mlir/test/mlir-reduce
 
     # FileSystem permissions tests fail with various special bits
     rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
     rm unittests/Support/Path.cpp
 
     substituteInPlace unittests/Support/CMakeLists.txt \
       --replace "Path.cpp" ""
+  '' + lib.optionalString stdenv.isAarch64 ''
+    # Not sure why this fails
+    rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s
+  '';
+
+  postInstall = lib.optionalString (!isNative) ''
+    cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native
   '';
 
   doCheck = buildTests;
+
+  nativeCheckInputs = [ which ]
+    ++ lib.optionals stdenv.isDarwin [ sysctl ];
+
+  checkTarget = "check-all";
   requiredSystemFeatures = [ "big-parallel" ];
 
   meta = with lib; {
     description = "Collection of modular and reusable compiler and toolchain technologies";
     homepage = "https://github.com/llvm/llvm-project";
     license = with licenses; [ ncsa ];
     maintainers = with maintainers; [ SomeoneSerge Madouura ];
-    platforms = platforms.linux;
-    # Consider the derivation broken if we're not building for CUDA or ROCm, or if we're building for aarch64
-    # and ROCm is enabled. See https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344.
-    broken = stdenv.isAarch64 && !config.cudaSupport;
+    platforms = with platforms; aarch64 ++ x86;
   };
 })
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
@@ -16,9 +16,10 @@
   filelock,
   jinja2,
   networkx,
-  openai-triton,
   sympy,
   numpy, pyyaml, cffi, click, typing-extensions,
+  # ROCm build and `torch.compile` requires `openai-triton`
+  tritonSupport ? (!stdenv.isDarwin), openai-triton,
 
   # Unit tests
   hypothesis, psutil,
@@ -303,12 +304,13 @@ in buildPythonPackage rec {
     "-Wno-pass-failed"
   ] ++ [
     "-Wno-unused-command-line-argument"
-    "-Wno-maybe-uninitialized"
     "-Wno-uninitialized"
     "-Wno-array-bounds"
-    "-Wno-stringop-overflow"
     "-Wno-free-nonheap-object"
     "-Wno-unused-result"
+  ] ++ lib.optionals stdenv.cc.isGNU [
+    "-Wno-maybe-uninitialized"
+    "-Wno-stringop-overflow"
   ]));
 
   nativeBuildInputs = [
@@ -377,12 +379,10 @@ in buildPythonPackage rec {
     # the following are required for tensorboard support
     pillow six future tensorboard protobuf
 
-    # ROCm build and `torch.compile` requires openai-triton
-    openai-triton
-
     # torch/csrc requires `pybind11` at runtime
     pybind11
   ]
+  ++ lib.optionals tritonSupport [ openai-triton ]
   ++ lib.optionals MPISupport [ mpi ]
   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];