[compiler] Make cuda/hip/vulkan target cl options consistent (iree-or…

…g#17710) This commits changes the command line options for specifying targets for CUDA/HIP/Vulkan to `--iree-<api>-target` and the corresponding feature option to `--iree-<api>-target-features`. This makes more sense given we are not using a triple or a chip anymore--it's more flexible as to accept codenames and product names. This also makes the options consistent. This is a breaking change. Here are the main list of options: Before | After :---: | :---: `--iree-hal-cuda-llvm-target-arch` | `--iree-cuda-target` `--iree-hal-cuda-llvm-target-feature` | `--iree-cuda-target-features` `--iree-rocm-target-chip` | `--iree-hip-target` `--iree-rocm-target-features` | `--iree-hip-target-features` `--iree-vulkan-target-triple` | `--iree-vulkan-target` `--iree-hal-cuda-*` | `--iree-cuda-*` `--iree-rocm-*` | `--iree-hip-*` Progress towards iree-org#16341 ci-extra: test_nvidia_gpu,test_nvidia_a100,test_amd_mi250,build_test_all_macos_arm64,build_and_test_android
nod-ai · Aug 22, 2024 · c44d29b · c44d29b
1 parent d1ccc8c
commit c44d29b
Show file tree

Hide file tree

Showing 32 changed files with 247 additions and 187 deletions.
diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake
@@ -166,7 +166,7 @@ function(iree_check_test)
     list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}")
   endif()
   if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM")
-    list(APPEND _BASE_COMPILER_FLAGS "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}")
+    list(APPEND _BASE_COMPILER_FLAGS "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}")
   endif()
 
   if(_BYTECODE_MODULE_BUILD_ENABLED)

diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json
@@ -2,7 +2,7 @@
   "config_name": "gpu_rocm",
   "iree_compile_flags": [
     "--iree-hal-target-backends=rocm",
-    "--iree-rocm-target-chip=gfx90a",
+    "--iree-hip-target=gfx90a",
     "--iree-input-demote-f64-to-f32"
   ],
   "iree_run_module_flags": [

diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json
@@ -2,7 +2,7 @@
   "config_name": "gpu_rocm",
   "iree_compile_flags": [
     "--iree-hal-target-backends=rocm",
-    "--iree-rocm-target-chip=gfx942",
+    "--iree-hip-target=gfx942",
     "--iree-input-demote-f64-to-f32"
   ],
   "iree_run_module_flags": [

diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp
@@ -55,59 +55,64 @@ namespace mlir::iree_compiler::IREE::HAL {
 namespace {
 struct CUDAOptions {
   bool dumpPtx = false;
-  std::string clTargetChip = "sm_60";
-  std::string clTargetFeature = "+ptx76";
+  std::string clTarget = "sm_60";
+  std::string clTargetFeatures = "+ptx76";
   bool clUsePtxas = false;
   std::string clUsePtxasFrom;
   std::string clUsePtxasParams;
 
   void bindOptions(OptionsBinder &binder) {
     static llvm::cl::OptionCategory category("CUDA HAL Target");
-    binder.opt<bool>("iree-hal-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category),
+    binder.opt<bool>("iree-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category),
                      llvm::cl::desc("Dump ptx to the debug stream."));
 
-    binder.opt<std::string>("iree-hal-cuda-llvm-target-arch", clTargetChip,
-                            llvm::cl::cat(category),
-                            llvm::cl::desc("LLVM target chip."));
+    binder.opt<std::string>(
+        "iree-cuda-target", clTarget, llvm::cl::cat(category),
+        llvm::cl::desc(
+            // clang-format off
+            "CUDA target as expected by LLVM NVPTX backend; e.g., "
+            "'sm_80'/'sm_90' for targeting Ampere/Hopper GPUs. "
+            "Additionally this also supports architecture code names like "
+            "'turing'/'ampere' or some product names like 'a100'/'rtx3090ti' "
+            "for a better experience. See "
+            "https://iree.dev/guides/deployment-configurations/gpu-cuda "
+            "for more details."
+            // clang-format on
+            ));
 
-    binder.opt<std::string>("iree-hal-cuda-llvm-target-feature",
-                            clTargetFeature, llvm::cl::cat(category),
-                            llvm::cl::desc("Use to set PTX version."));
+    binder.opt<std::string>(
+        "iree-cuda-target-features", clTargetFeatures, llvm::cl::cat(category),
+        llvm::cl::desc(
+            "CUDA target features as expected by LLVM NVPTX backend; e.g. "
+            "use '+ptxNN' to set PTX version to NN."));
 
     binder.opt<bool>(
-        "iree-hal-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category),
-        llvm::cl::desc("It uses the ptxas compiler that is on the environment, "
-                       "compiles the "
-                       "generated PTX code with it, puts the cubin binary "
-                       "generated by ptxas "
-                       "into the executable. "
-                       "'--iree-hal-cuda-llvm-target-arch' is used as "
-                       "the target GPU. If passing additional parameters to "
-                       "ptxas is desired, "
-                       "the parameters flag can be used "
-                       "(e.g.'--iree-hal-cuda-use-ptxas-params=-v')."));
+        "iree-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category),
+        llvm::cl::desc(
+            "Whether to use the ptxas tool to assemble the generated PTX "
+            "code and put the generated CUBIN binary file into the executable. "
+            "If not set, directly embeds the PTX into the executable. "
+            "To specify the exact ptxas tool path, use "
+            "'--iree-cuda-use-ptxas-from'. To pass "
+            "additional parameters to ptxas, use "
+            "'--iree-cuda-use-ptxas-params', e.g. "
+            "'--iree-cuda-use-ptxas-params=-v'"));
 
     binder.opt<std::string>(
-        "iree-hal-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category),
-        llvm::cl::desc(
-            "It uses the provided ptxas compiler, compiles the generated PTX "
-            "code with it, puts the cubin binary generated by ptxas into the "
-            "executable. '--iree-hal-cuda-llvm-target-arch' is used as the "
-            "target GPU. If passing additional parameters to ptxas is desired, "
-            "the "
-            "parameters flag can be used "
-            "(e.g.'--iree-hal-cuda-use-ptxas-params=-v')."));
+        "iree-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category),
+        llvm::cl::desc("Uses the ptxas tool from the given path. Requires "
+                       "'--iree-cuda-use-ptxas' to be true."));
 
     binder.opt<std::string>(
-        "iree-hal-cuda-use-ptxas-params", clUsePtxasParams,
-        llvm::cl::cat(category),
-        llvm::cl::desc("Passes the given additional parameters to ptxas."));
+        "iree-cuda-use-ptxas-params", clUsePtxasParams, llvm::cl::cat(category),
+        llvm::cl::desc("Passes the given additional parameters to ptxas. "
+                       "Requires '--iree-cuda-use-ptxas' to be true."));
   }
 
   LogicalResult verify(mlir::Builder &builder) const {
-    if (GPU::normalizeCUDATarget(clTargetChip).empty()) {
+    if (GPU::normalizeCUDATarget(clTarget).empty()) {
       return emitError(builder.getUnknownLoc(), "Unknown CUDA target '")
-             << clTargetChip << "'";
+             << clTarget << "'";
     }
     return success();
   }
@@ -131,7 +136,7 @@ static FailureOr<std::string> findPtxasCompiler(const CUDAOptions &options,
 
   *message = std::string(
       "Could not find ptxas compiler. Try passing it explicitly with "
-      "--iree-hal-cuda-use-ptxas-from=<path> flag");
+      "--iree-cuda-use-ptxas-from=<path> flag");
   return failure();
 }
 
@@ -437,7 +442,7 @@ class CUDATargetBackend final : public TargetBackend {
       return nullptr;
 
     if (auto target = GPU::getCUDATargetDetails(
-            options.clTargetChip, options.clTargetFeature, context))
+            options.clTarget, options.clTargetFeatures, context))
       addConfig("iree.gpu.target", target);
 
     return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
@@ -471,8 +476,8 @@ class CUDATargetBackend final : public TargetBackend {
                                     IREE::HAL::ExecutableVariantOp variantOp,
                                     OpBuilder &executableBuilder) override {
     auto targetAttr = variantOp.getTargetAttr();
-    StringRef targetArch = options.clTargetChip;
-    StringRef targetFeatures = options.clTargetFeature;
+    StringRef targetArch = options.clTarget;
+    StringRef targetFeatures = options.clTargetFeatures;
     if (auto attr = getGPUTargetAttr(targetAttr)) {
       targetArch = attr.getArch();
       targetFeatures = attr.getFeatures();

diff --git a/compiler/plugins/target/CUDA/test/smoketest.mlir b/compiler/plugins/target/CUDA/test/smoketest.mlir
@@ -1,5 +1,5 @@
 // RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 %s | FileCheck %s
-// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-hal-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX
+// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX
 
 #map = affine_map<(d0) -> (d0)>
 

diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
@@ -56,7 +56,7 @@ namespace mlir::iree_compiler::IREE::HAL {
 namespace {
 
 struct ROCmOptions {
-  std::string targetChip = "gfx908";
+  std::string target = "gfx908";
   std::string targetFeatures = "";
   std::string bitcodeDirectory = getDefaultBitcodeDirectory();
   int wavesPerEu = 0;
@@ -65,39 +65,51 @@ struct ROCmOptions {
 
   void bindOptions(OptionsBinder &binder) {
     using namespace llvm;
-    static cl::OptionCategory category("ROCm HAL Target");
-    binder.opt<std::string>("iree-rocm-target-chip", targetChip,
-                            cl::cat(category), cl::desc("ROCm target chip."));
+    static cl::OptionCategory category("HIP HAL Target");
     binder.opt<std::string>(
-        "iree-rocm-target-features", targetFeatures, cl::cat(category),
-        cl::desc("ROCm target features; e.g., '+sramecc,+xnack'."));
-    binder.opt<std::string>("iree-rocm-bc-dir", bitcodeDirectory,
+        "iree-hip-target", target, cl::cat(category),
+        cl::desc(
+            // clang-format off
+            "HIP target as expected by LLVM AMDGPU backend; e.g., "
+            "'gfx90a'/'gfx942' for targeting MI250/MI300 GPUs. "
+            "Additionally this also supports architecture code names like "
+            "'cdna3'/'rdna3' or some product names like 'mi300x'/'rtx7900xtx' "
+            "for a better experience. See "
+            "https://iree.dev/guides/deployment-configurations/gpu-rocm/ "
+            "for more details."
+            // clang-format on
+            ));
+    binder.opt<std::string>(
+        "iree-hip-target-features", targetFeatures, cl::cat(category),
+        cl::desc("HIP target features as expected by LLVM AMDGPU backend; "
+                 "e.g., '+sramecc,+xnack'."));
+    binder.opt<std::string>("iree-hip-bc-dir", bitcodeDirectory,
                             cl::cat(category),
-                            cl::desc("Directory of ROCm Bitcode."));
-    binder.opt<int>("iree-rocm-waves-per-eu", wavesPerEu, cl::cat(category),
+                            cl::desc("Directory of HIP Bitcode."));
+    binder.opt<int>("iree-hip-waves-per-eu", wavesPerEu, cl::cat(category),
                     cl::desc("Optimization hint specifying minimum "
                              "number of waves per execution unit."));
     binder.opt<std::string>(
-        "iree-rocm-enable-ukernels", enableROCMUkernels, cl::cat(category),
-        cl::desc("Enables microkernels in the rocm compiler backend. May be "
+        "iree-hip-enable-ukernels", enableROCMUkernels, cl::cat(category),
+        cl::desc("Enables microkernels in the HIP compiler backend. May be "
                  "`default`, `none`, `all`, or a comma-separated list of "
                  "specific unprefixed microkernels to enable, e.g. `mmt4d`."));
-    binder.opt<bool>("iree-rocm-legacy-sync", legacySync, cl::cat(category),
+    binder.opt<bool>("iree-hip-legacy-sync", legacySync, cl::cat(category),
                      cl::desc("Enables 'legacy-sync' mode, which is required "
                               "for inline execution."));
   }
 
   LogicalResult verify(mlir::Builder &builder) const {
-    if (GPU::normalizeHIPTarget(targetChip).empty()) {
-      return emitError(builder.getUnknownLoc(), "Unknown ROCm target '")
-             << targetChip << "'";
+    if (GPU::normalizeHIPTarget(target).empty()) {
+      return emitError(builder.getUnknownLoc(), "Unknown HIP target '")
+             << target << "'";
     }
     SmallVector<StringRef> features;
     llvm::SplitString(targetFeatures, features, ",");
     for (StringRef f : features) {
       if (!(f.starts_with("+") || f.starts_with("-"))) {
         return emitError(builder.getUnknownLoc(),
-                         "ROCm target feature must be prefixed with '+' or "
+                         "HIP target feature must be prefixed with '+' or "
                          "'-'; but seen '")
                << f << "'";
       }
@@ -106,7 +118,7 @@ struct ROCmOptions {
         // We only support these two features to be set explicitly. Features
         // like wavefrontsize is controlled and tuned by the compiler.
         return emitError(builder.getUnknownLoc(),
-                         "ROCm target feature can only be 'sramecc' or "
+                         "HIP target feature can only be 'sramecc' or "
                          "'xnack'; but seen '")
                << feature << "'";
       }
@@ -259,7 +271,7 @@ class ROCMTargetBackend final : public TargetBackend {
     if (failed(options.verify(b)))
       return nullptr;
 
-    if (auto target = GPU::getHIPTargetDetails(options.targetChip,
+    if (auto target = GPU::getHIPTargetDetails(options.target,
                                                options.targetFeatures, context))
       addConfig("iree.gpu.target", target);
 
@@ -336,7 +348,7 @@ class ROCMTargetBackend final : public TargetBackend {
                                     OpBuilder &executableBuilder) override {
     ModuleOp innerModuleOp = variantOp.getInnerModule();
     auto targetAttr = variantOp.getTargetAttr();
-    StringRef targetArch = options.targetChip;
+    StringRef targetArch = options.target;
     StringRef targetFeatures = options.targetFeatures;
     if (auto attr = getGPUTargetAttr(targetAttr)) {
       targetArch = attr.getArch();
@@ -517,7 +529,7 @@ class ROCMTargetBackend final : public TargetBackend {
         return variantOp.emitError()
                << "cannot find ROCM bitcode files. Check your installation "
                   "consistency and in the worst case, set "
-                  "--iree-rocm-bc-dir= to a path on your system.";
+                  "--iree-hip-bc-dir= to a path on your system.";
       }
       if (failed(linkHIPBitcodeIfNeeded(variantOp.getLoc(), llvmModule.get(),
                                         targetArch, bitcodeDirectory))) {

diff --git a/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir b/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-rocm-enable-ukernels=all --iree-rocm-target-chip=gfx1100 --compile-to=executable-targets %s | FileCheck %s
+// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-hip-enable-ukernels=all --iree-hip-target=gfx1100 --compile-to=executable-targets %s | FileCheck %s
 
 // We want to check that uKernel is indeed generated from e2e workflow.
 

diff --git a/compiler/plugins/target/ROCM/test/target_device_features.mlir b/compiler/plugins/target/ROCM/test/target_device_features.mlir
@@ -1,7 +1,7 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=mi300x %s | FileCheck %s --check-prefix=GFX942
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx940 %s | FileCheck %s --check-prefix=GFX940
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx941 --iree-rocm-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=mi300x %s | FileCheck %s --check-prefix=GFX942
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx940 %s | FileCheck %s --check-prefix=GFX940
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx941 --iree-hip-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941
 
 // GFX942: target = #iree_gpu.target<arch = "gfx942",
 // GFX942-SAME: wgp = <compute =  fp64|fp32|fp16|int64|int32|int16|int8, storage =  b64|b32|b16|b8,

diff --git a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp
@@ -35,16 +35,23 @@ struct VulkanSPIRVTargetOptions {
   // Use vp_android_baseline_2022 profile as the default target--it's a good
   // lowest common denominator to guarantee the generated SPIR-V is widely
   // accepted for now. Eventually we want to use a list for multi-targeting.
-  std::string targetTriple = "vp_android_baseline_2022";
+  std::string target = "vp_android_baseline_2022";
   bool indirectBindings = false;
 
   void bindOptions(OptionsBinder &binder) {
     static llvm::cl::OptionCategory category("VulkanSPIRV HAL Target");
     binder.opt<std::string>(
-        // TODO: Rename this as target given it's not a triple anymore.
-        "iree-vulkan-target-triple", targetTriple,
+        "iree-vulkan-target", target,
         llvm::cl::desc(
-            "Vulkan target triple controlling the SPIR-V environment."));
+            "Vulkan target controlling the SPIR-V environment. Given the wide "
+            "support of Vulkan, this option supports a few schemes: 1) LLVM "
+            "CodeGen backend style: e.g., 'gfx*' for AMD GPUs and 'sm_*' for "
+            "NVIDIA GPUs; 2) architecture code name style: e.g., "
+            "'rdna3'/'valhall4'/'ampere'/'adreno' for AMD/ARM/NVIDIA/Qualcomm "
+            "GPUs; 3) product name style: 'rx7900xtx'/'rtx4090' for AMD/NVIDIA "
+            "GPUs. See "
+            "https://iree.dev/guides/deployment-configurations/gpu-vulkan/ for "
+            "more details."));
     binder.opt<bool>(
         "iree-vulkan-experimental-indirect-bindings", indirectBindings,
         llvm::cl::desc(
@@ -103,13 +110,11 @@ class VulkanSPIRVTargetBackend : public TargetBackend {
       configItems.emplace_back(b.getStringAttr(name), value);
     };
 
-    // We only care about the architecture right now.
-    StringRef arch = StringRef(options_.targetTriple).split("-").first;
-    if (auto target = GPU::getVulkanTargetDetails(arch, context)) {
+    if (auto target = GPU::getVulkanTargetDetails(options_.target, context)) {
       addConfig("iree.gpu.target", target);
     } else {
       emitError(b.getUnknownLoc(), "Unknown Vulkan target '")
-          << options_.targetTriple << "'";
+          << options_.target << "'";
       return nullptr;
     }
 

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
@@ -43,7 +43,7 @@ namespace mlir::iree_compiler {
 #include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc"
 
 static llvm::cl::opt<int>
-    clROCMIndexingBits("iree-rocm-index-bits",
+    clROCMIndexingBits("iree-hip-index-bits",
                        llvm::cl::desc("Set the bit width of indices in ROCm."),
                        llvm::cl::init(64));
 

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
@@ -1,5 +1,5 @@
 // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" %s | FileCheck %s
-// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-rocm-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32
+// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-hip-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32
 
 // Test that that standard and GPU ops are converted to LLVM and NVVM.
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [

diff --git a/docs/website/docs/developers/debugging/compile-time-regressions.md b/docs/website/docs/developers/debugging/compile-time-regressions.md
@@ -102,7 +102,7 @@ TMP_DIR="../iree-tmp"
 declare -a COMPILER_FLAGS=(
   "--iree-input-type=stablehlo"
   "--iree-hal-target-backends=cuda"
-  "--iree-hal-cuda-llvm-target-arch=sm_80"
+  "--iree-cuda-target=sm_80"
 )
 
 TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10