diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake index aa37760295aa..cb6d32293d07 100644 --- a/build_tools/cmake/iree_check_test.cmake +++ b/build_tools/cmake/iree_check_test.cmake @@ -166,7 +166,7 @@ function(iree_check_test) list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") endif() if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM") - list(APPEND _BASE_COMPILER_FLAGS "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}") + list(APPEND _BASE_COMPILER_FLAGS "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}") endif() if(_BYTECODE_MODULE_BUILD_ENABLED) diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json index 0ac678de2d74..3ffd0431c765 100644 --- a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json +++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json @@ -2,7 +2,7 @@ "config_name": "gpu_rocm", "iree_compile_flags": [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx90a", + "--iree-hip-target=gfx90a", "--iree-input-demote-f64-to-f32" ], "iree_run_module_flags": [ diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json index de01f7ef038b..bc1907ad2edb 100644 --- a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json +++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json @@ -2,7 +2,7 @@ "config_name": "gpu_rocm", "iree_compile_flags": [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx942", + "--iree-hip-target=gfx942", "--iree-input-demote-f64-to-f32" ], "iree_run_module_flags": [ diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp index 7e78ae3eeb2c..331b5a180e26 100644 --- a/compiler/plugins/target/CUDA/CUDATarget.cpp +++ b/compiler/plugins/target/CUDA/CUDATarget.cpp @@ -55,59 +55,64 @@ namespace mlir::iree_compiler::IREE::HAL { namespace { struct CUDAOptions { bool dumpPtx = false; - std::string clTargetChip = "sm_60"; - std::string clTargetFeature = "+ptx76"; + std::string clTarget = "sm_60"; + std::string clTargetFeatures = "+ptx76"; bool clUsePtxas = false; std::string clUsePtxasFrom; std::string clUsePtxasParams; void bindOptions(OptionsBinder &binder) { static llvm::cl::OptionCategory category("CUDA HAL Target"); - binder.opt("iree-hal-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category), + binder.opt("iree-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category), llvm::cl::desc("Dump ptx to the debug stream.")); - binder.opt("iree-hal-cuda-llvm-target-arch", clTargetChip, - llvm::cl::cat(category), - llvm::cl::desc("LLVM target chip.")); + binder.opt( + "iree-cuda-target", clTarget, llvm::cl::cat(category), + llvm::cl::desc( + // clang-format off + "CUDA target as expected by LLVM NVPTX backend; e.g., " + "'sm_80'/'sm_90' for targeting Ampere/Hopper GPUs. " + "Additionally this also supports architecture code names like " + "'turing'/'ampere' or some product names like 'a100'/'rtx3090ti' " + "for a better experience. See " + "https://iree.dev/guides/deployment-configurations/gpu-cuda " + "for more details." + // clang-format on + )); - binder.opt("iree-hal-cuda-llvm-target-feature", - clTargetFeature, llvm::cl::cat(category), - llvm::cl::desc("Use to set PTX version.")); + binder.opt( + "iree-cuda-target-features", clTargetFeatures, llvm::cl::cat(category), + llvm::cl::desc( + "CUDA target features as expected by LLVM NVPTX backend; e.g. " + "use '+ptxNN' to set PTX version to NN.")); binder.opt( - "iree-hal-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category), - llvm::cl::desc("It uses the ptxas compiler that is on the environment, " - "compiles the " - "generated PTX code with it, puts the cubin binary " - "generated by ptxas " - "into the executable. " - "'--iree-hal-cuda-llvm-target-arch' is used as " - "the target GPU. If passing additional parameters to " - "ptxas is desired, " - "the parameters flag can be used " - "(e.g.'--iree-hal-cuda-use-ptxas-params=-v').")); + "iree-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category), + llvm::cl::desc( + "Whether to use the ptxas tool to assemble the generated PTX " + "code and put the generated CUBIN binary file into the executable. " + "If not set, directly embeds the PTX into the executable. " + "To specify the exact ptxas tool path, use " + "'--iree-cuda-use-ptxas-from'. To pass " + "additional parameters to ptxas, use " + "'--iree-cuda-use-ptxas-params', e.g. " + "'--iree-cuda-use-ptxas-params=-v'")); binder.opt( - "iree-hal-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category), - llvm::cl::desc( - "It uses the provided ptxas compiler, compiles the generated PTX " - "code with it, puts the cubin binary generated by ptxas into the " - "executable. '--iree-hal-cuda-llvm-target-arch' is used as the " - "target GPU. If passing additional parameters to ptxas is desired, " - "the " - "parameters flag can be used " - "(e.g.'--iree-hal-cuda-use-ptxas-params=-v').")); + "iree-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category), + llvm::cl::desc("Uses the ptxas tool from the given path. Requires " + "'--iree-cuda-use-ptxas' to be true.")); binder.opt( - "iree-hal-cuda-use-ptxas-params", clUsePtxasParams, - llvm::cl::cat(category), - llvm::cl::desc("Passes the given additional parameters to ptxas.")); + "iree-cuda-use-ptxas-params", clUsePtxasParams, llvm::cl::cat(category), + llvm::cl::desc("Passes the given additional parameters to ptxas. " + "Requires '--iree-cuda-use-ptxas' to be true.")); } LogicalResult verify(mlir::Builder &builder) const { - if (GPU::normalizeCUDATarget(clTargetChip).empty()) { + if (GPU::normalizeCUDATarget(clTarget).empty()) { return emitError(builder.getUnknownLoc(), "Unknown CUDA target '") - << clTargetChip << "'"; + << clTarget << "'"; } return success(); } @@ -131,7 +136,7 @@ static FailureOr findPtxasCompiler(const CUDAOptions &options, *message = std::string( "Could not find ptxas compiler. Try passing it explicitly with " - "--iree-hal-cuda-use-ptxas-from= flag"); + "--iree-cuda-use-ptxas-from= flag"); return failure(); } @@ -437,7 +442,7 @@ class CUDATargetBackend final : public TargetBackend { return nullptr; if (auto target = GPU::getCUDATargetDetails( - options.clTargetChip, options.clTargetFeature, context)) + options.clTarget, options.clTargetFeatures, context)) addConfig("iree.gpu.target", target); return b.getAttr( @@ -471,8 +476,8 @@ class CUDATargetBackend final : public TargetBackend { IREE::HAL::ExecutableVariantOp variantOp, OpBuilder &executableBuilder) override { auto targetAttr = variantOp.getTargetAttr(); - StringRef targetArch = options.clTargetChip; - StringRef targetFeatures = options.clTargetFeature; + StringRef targetArch = options.clTarget; + StringRef targetFeatures = options.clTargetFeatures; if (auto attr = getGPUTargetAttr(targetAttr)) { targetArch = attr.getArch(); targetFeatures = attr.getFeatures(); diff --git a/compiler/plugins/target/CUDA/test/smoketest.mlir b/compiler/plugins/target/CUDA/test/smoketest.mlir index fc7d8fcd1b55..7843f1f48a04 100644 --- a/compiler/plugins/target/CUDA/test/smoketest.mlir +++ b/compiler/plugins/target/CUDA/test/smoketest.mlir @@ -1,5 +1,5 @@ // RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 %s | FileCheck %s -// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-hal-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX +// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX #map = affine_map<(d0) -> (d0)> diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp index d6cc400a299e..c1dc78090d20 100644 --- a/compiler/plugins/target/ROCM/ROCMTarget.cpp +++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp @@ -56,7 +56,7 @@ namespace mlir::iree_compiler::IREE::HAL { namespace { struct ROCmOptions { - std::string targetChip = "gfx908"; + std::string target = "gfx908"; std::string targetFeatures = ""; std::string bitcodeDirectory = getDefaultBitcodeDirectory(); int wavesPerEu = 0; @@ -65,39 +65,51 @@ struct ROCmOptions { void bindOptions(OptionsBinder &binder) { using namespace llvm; - static cl::OptionCategory category("ROCm HAL Target"); - binder.opt("iree-rocm-target-chip", targetChip, - cl::cat(category), cl::desc("ROCm target chip.")); + static cl::OptionCategory category("HIP HAL Target"); binder.opt( - "iree-rocm-target-features", targetFeatures, cl::cat(category), - cl::desc("ROCm target features; e.g., '+sramecc,+xnack'.")); - binder.opt("iree-rocm-bc-dir", bitcodeDirectory, + "iree-hip-target", target, cl::cat(category), + cl::desc( + // clang-format off + "HIP target as expected by LLVM AMDGPU backend; e.g., " + "'gfx90a'/'gfx942' for targeting MI250/MI300 GPUs. " + "Additionally this also supports architecture code names like " + "'cdna3'/'rdna3' or some product names like 'mi300x'/'rtx7900xtx' " + "for a better experience. See " + "https://iree.dev/guides/deployment-configurations/gpu-rocm/ " + "for more details." + // clang-format on + )); + binder.opt( + "iree-hip-target-features", targetFeatures, cl::cat(category), + cl::desc("HIP target features as expected by LLVM AMDGPU backend; " + "e.g., '+sramecc,+xnack'.")); + binder.opt("iree-hip-bc-dir", bitcodeDirectory, cl::cat(category), - cl::desc("Directory of ROCm Bitcode.")); - binder.opt("iree-rocm-waves-per-eu", wavesPerEu, cl::cat(category), + cl::desc("Directory of HIP Bitcode.")); + binder.opt("iree-hip-waves-per-eu", wavesPerEu, cl::cat(category), cl::desc("Optimization hint specifying minimum " "number of waves per execution unit.")); binder.opt( - "iree-rocm-enable-ukernels", enableROCMUkernels, cl::cat(category), - cl::desc("Enables microkernels in the rocm compiler backend. May be " + "iree-hip-enable-ukernels", enableROCMUkernels, cl::cat(category), + cl::desc("Enables microkernels in the HIP compiler backend. May be " "`default`, `none`, `all`, or a comma-separated list of " "specific unprefixed microkernels to enable, e.g. `mmt4d`.")); - binder.opt("iree-rocm-legacy-sync", legacySync, cl::cat(category), + binder.opt("iree-hip-legacy-sync", legacySync, cl::cat(category), cl::desc("Enables 'legacy-sync' mode, which is required " "for inline execution.")); } LogicalResult verify(mlir::Builder &builder) const { - if (GPU::normalizeHIPTarget(targetChip).empty()) { - return emitError(builder.getUnknownLoc(), "Unknown ROCm target '") - << targetChip << "'"; + if (GPU::normalizeHIPTarget(target).empty()) { + return emitError(builder.getUnknownLoc(), "Unknown HIP target '") + << target << "'"; } SmallVector features; llvm::SplitString(targetFeatures, features, ","); for (StringRef f : features) { if (!(f.starts_with("+") || f.starts_with("-"))) { return emitError(builder.getUnknownLoc(), - "ROCm target feature must be prefixed with '+' or " + "HIP target feature must be prefixed with '+' or " "'-'; but seen '") << f << "'"; } @@ -106,7 +118,7 @@ struct ROCmOptions { // We only support these two features to be set explicitly. Features // like wavefrontsize is controlled and tuned by the compiler. return emitError(builder.getUnknownLoc(), - "ROCm target feature can only be 'sramecc' or " + "HIP target feature can only be 'sramecc' or " "'xnack'; but seen '") << feature << "'"; } @@ -259,7 +271,7 @@ class ROCMTargetBackend final : public TargetBackend { if (failed(options.verify(b))) return nullptr; - if (auto target = GPU::getHIPTargetDetails(options.targetChip, + if (auto target = GPU::getHIPTargetDetails(options.target, options.targetFeatures, context)) addConfig("iree.gpu.target", target); @@ -336,7 +348,7 @@ class ROCMTargetBackend final : public TargetBackend { OpBuilder &executableBuilder) override { ModuleOp innerModuleOp = variantOp.getInnerModule(); auto targetAttr = variantOp.getTargetAttr(); - StringRef targetArch = options.targetChip; + StringRef targetArch = options.target; StringRef targetFeatures = options.targetFeatures; if (auto attr = getGPUTargetAttr(targetAttr)) { targetArch = attr.getArch(); @@ -517,7 +529,7 @@ class ROCMTargetBackend final : public TargetBackend { return variantOp.emitError() << "cannot find ROCM bitcode files. Check your installation " "consistency and in the worst case, set " - "--iree-rocm-bc-dir= to a path on your system."; + "--iree-hip-bc-dir= to a path on your system."; } if (failed(linkHIPBitcodeIfNeeded(variantOp.getLoc(), llvmModule.get(), targetArch, bitcodeDirectory))) { diff --git a/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir b/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir index f63a94144598..5b4b416e5059 100644 --- a/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir +++ b/compiler/plugins/target/ROCM/builtins/ukernel/test/argmax_linking.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-rocm-enable-ukernels=all --iree-rocm-target-chip=gfx1100 --compile-to=executable-targets %s | FileCheck %s +// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-hip-enable-ukernels=all --iree-hip-target=gfx1100 --compile-to=executable-targets %s | FileCheck %s // We want to check that uKernel is indeed generated from e2e workflow. diff --git a/compiler/plugins/target/ROCM/test/target_device_features.mlir b/compiler/plugins/target/ROCM/test/target_device_features.mlir index 8f6a88f5fa71..070a735a89a9 100644 --- a/compiler/plugins/target/ROCM/test/target_device_features.mlir +++ b/compiler/plugins/target/ROCM/test/target_device_features.mlir @@ -1,7 +1,7 @@ -// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=mi300x %s | FileCheck %s --check-prefix=GFX942 -// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx940 %s | FileCheck %s --check-prefix=GFX940 -// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100 -// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx941 --iree-rocm-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941 +// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=mi300x %s | FileCheck %s --check-prefix=GFX942 +// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx940 %s | FileCheck %s --check-prefix=GFX940 +// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100 +// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx941 --iree-hip-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941 // GFX942: target = #iree_gpu.target( - // TODO: Rename this as target given it's not a triple anymore. - "iree-vulkan-target-triple", targetTriple, + "iree-vulkan-target", target, llvm::cl::desc( - "Vulkan target triple controlling the SPIR-V environment.")); + "Vulkan target controlling the SPIR-V environment. Given the wide " + "support of Vulkan, this option supports a few schemes: 1) LLVM " + "CodeGen backend style: e.g., 'gfx*' for AMD GPUs and 'sm_*' for " + "NVIDIA GPUs; 2) architecture code name style: e.g., " + "'rdna3'/'valhall4'/'ampere'/'adreno' for AMD/ARM/NVIDIA/Qualcomm " + "GPUs; 3) product name style: 'rx7900xtx'/'rtx4090' for AMD/NVIDIA " + "GPUs. See " + "https://iree.dev/guides/deployment-configurations/gpu-vulkan/ for " + "more details.")); binder.opt( "iree-vulkan-experimental-indirect-bindings", indirectBindings, llvm::cl::desc( @@ -103,13 +110,11 @@ class VulkanSPIRVTargetBackend : public TargetBackend { configItems.emplace_back(b.getStringAttr(name), value); }; - // We only care about the architecture right now. - StringRef arch = StringRef(options_.targetTriple).split("-").first; - if (auto target = GPU::getVulkanTargetDetails(arch, context)) { + if (auto target = GPU::getVulkanTargetDetails(options_.target, context)) { addConfig("iree.gpu.target", target); } else { emitError(b.getUnknownLoc(), "Unknown Vulkan target '") - << options_.targetTriple << "'"; + << options_.target << "'"; return nullptr; } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp index 215a4bb8a237..94500d5dc388 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp @@ -43,7 +43,7 @@ namespace mlir::iree_compiler { #include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" static llvm::cl::opt - clROCMIndexingBits("iree-rocm-index-bits", + clROCMIndexingBits("iree-hip-index-bits", llvm::cl::desc("Set the bit width of indices in ROCm."), llvm::cl::init(64)); diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir index d5c993174407..a88ec61be9cc 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir @@ -1,5 +1,5 @@ // RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" %s | FileCheck %s -// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-rocm-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32 +// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-hip-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32 // Test that that standard and GPU ops are converted to LLVM and NVVM. #pipeline_layout = #hal.pipeline.layout \ + --iree-cuda-target=<...> \ mobilenet_iree_input.mlir -o mobilenet_cuda.vmfb ``` -Note that a cuda target architecture (`iree-hal-cuda-llvm-target-arch`) of -the form `sm_` is needed to compile towards each GPU -architecture. If no architecture is specified then we will default to -`sm_35`. +Canonically a CUDA target (`iree-cuda-target`) matching the LLVM NVPTX backend +of the form `sm_` is needed to compile towards each GPU +architecture. If no architecture is specified then we will default to `sm_60`. Here is a table of commonly used architectures: -| CUDA GPU | Target Architecture | -| ----------- | ------------------- | -| Nvidia K80 | `sm_35` | -| Nvidia P100 | `sm_60` | -| Nvidia V100 | `sm_70` | -| Nvidia A100 | `sm_80` | +| CUDA GPU | Target Architecture | Architecture Code Name +| ------------------- | ------------------- | ---------------------- +| NVIDIA P100 | `sm_60` | `pascal` +| NVIDIA V100 | `sm_70` | `volta` +| NVIDIA A100 | `sm_80` | `ampere` +| NVIDIA H100 | `sm_90` | `hopper` +| NVIDIA RTX20 series | `sm_75` | `turing` +| NVIDIA RTX30 series | `sm_86` | `ampere` +| NVIDIA RTX40 series | `sm_89` | `ada` + +In addition to the canonical `sm_` scheme, `iree-cuda-target` also +supports two additonal schemes to make a better developer experience: + +* Architecture code names like `volta` or `ampere` +* GPU product names like `a100` or `rtx3090` + +These two schemes are translated into the canonical form under the hood. +We add support for common code/product names without aiming to be exhaustive. +If the ones you want are missing, please use the canonical form. ### :octicons-terminal-16: Run a compiled program diff --git a/docs/website/docs/guides/deployment-configurations/gpu-rocm.md b/docs/website/docs/guides/deployment-configurations/gpu-rocm.md index 9f7d34ca1491..1dc4232c9f65 100644 --- a/docs/website/docs/guides/deployment-configurations/gpu-rocm.md +++ b/docs/website/docs/guides/deployment-configurations/gpu-rocm.md @@ -76,31 +76,47 @@ following commands to compile: ```shell hl_lines="2-5" iree-compile \ --iree-hal-target-backends=rocm \ - --iree-rocm-target-chip=<...> \ + --iree-hip-target=<...> \ mobilenet_iree_input.mlir -o mobilenet_rocm.vmfb ``` Note that IREE comes with bundled bitcode files, which are used for linking certain intrinsics on AMD GPUs. These will be used automatically or if the -`--iree-rocm-bc-dir` is empty. As additional support may be needed for +`--iree-hip-bc-dir` is empty. As additional support may be needed for different chips, users can use this flag to point to an explicit directory. For example, in ROCm installations on Linux, this is often found under `/opt/rocm/amdgcn/bitcode`. -Note that a ROCm target chip (`iree-rocm-target-chip`) of the form -`gfx` is needed to compile towards each GPU architecture. If -no architecture is specified then we will default to `gfx908`. +Canonically a HIP target (`iree-hip-target`) matching the LLVM AMDGPU backend +of the form `gfx` is needed to compile towards each GPU chip. +If no target is specified then we will default to `gfx908`. Here is a table of commonly used architectures: -| AMD GPU | Target Chip | -| ---------- | ----------- | -| AMD MI25 | `gfx900` | -| AMD MI50 | `gfx906` | -| AMD MI60 | `gfx906` | -| AMD MI100 | `gfx908` | -| AMD MI300A | `gfx940` | -| AMD MI300 | `gfx942` | +| AMD GPU | Target Chip | Architecture Code Name +| ------------- | ----------- | ---------------------- +| AMD MI100 | `gfx908` | `cdna1` +| AMD MI210 | `gfx90a` | `cdna2` +| AMD MI250 | `gfx90a` | `cdna2` +| AMD MI300A | `gfx940` | `cdna3` +| AMD MI300X | `gfx942` | `cdna3` +| AMD RX7900XTX | `gfx1100` | `rdna3` +| AMD RX7900XT | `gfx1100` | `rdna3` +| AMD RX7800XT | `gfx1101` | `rdna3` +| AMD RX7700XT | `gfx1101` | `rdna3` + +For a more comprehensive list of prior GPU generations, you can refer to the +[LLVM AMDGPU backend](https://llvm.org/docs/AMDGPUUsage.html#processors). + +In addition to the canonical `gfx` scheme, `iree-hip-target` also +supports two additonal schemes to make a better developer experience: + +* Architecture code names like `cdna3` or `rdna3` +* GPU product names like `mi300x` or `rx7900xtx` + +These two schemes are translated into the canonical form under the hood. +We add support for common code/product names without aiming to be exhaustive. +If the ones you want are missing, please use the canonical form. ### :octicons-terminal-16: Run a compiled program diff --git a/docs/website/docs/guides/deployment-configurations/gpu-vulkan.md b/docs/website/docs/guides/deployment-configurations/gpu-vulkan.md index 28b2384e24a6..adbca81db317 100644 --- a/docs/website/docs/guides/deployment-configurations/gpu-vulkan.md +++ b/docs/website/docs/guides/deployment-configurations/gpu-vulkan.md @@ -27,7 +27,7 @@ GPU Vendor | Category | Performance | Focus Architecture ARM Mali GPU | Mobile | Good | Valhall+ Qualcomm Adreno GPU | Mobile | Reasonable | 640+ AMD GPU | Desktop/server | Good | RDNA+ -NVIDIA GPU | Desktop/server | Good | Turing+ +NVIDIA GPU | Desktop/server | Reasonable | Turing+ ## :octicons-download-16: Prerequisites @@ -186,35 +186,51 @@ command to compile with the `vulkan-spirv` target: ``` shell hl_lines="2 3" iree-compile \ --iree-hal-target-backends=vulkan-spirv \ - --iree-vulkan-target-triple=<...> \ + --iree-vulkan-target=<...> \ mobilenet_iree_input.mlir -o mobilenet_vulkan.vmfb ``` -!!! note annotate - Currently a target triple of the form `--` is needed - to compile towards a specific GPU architecture. - - We don't support the full spectrum here(1); the following table summarizes - the currently recognized ones. - - If no triple is specified, then a safe but more limited default will be used. +`iree-vulkan-target` specifies the GPU architecture to target. It accepts a few +schemes: + +* LLVM CodeGen backend style: this is using LLVM AMDGPU/NVPTX CodeGen targets + like `gfx1100` for AMD RX 7900XTX and `sm_86` for NVIDIA RTX 3090 GPUs. +* Architecture code name style: e.g., using `rdna3`/`valhall4`/`ampere`/`adreno` + for AMD/ARM/NVIDIA/Qualcomm GPUs. +* Product name style(1): e.g., using `rx7900xtx`/`a100` for corresponding GPUs. + +Here are a few examples showing how you can target various recent common GPUs: + +| GPU | Target Architecture | Architecture Code Name | Product Name +| ------------------- | ------------------- | ---------------------- | ------------ +| AMD RX7900XTX | `gfx1100` | `rdna3` | `rx7900xtx` +| AMD RX7900XT | `gfx1100` | `rdna3` | `rx7900xt` +| AMD RX7800XT | `gfx1101` | `rdna3` | `rx7800xt` +| AMD RX7700XT | `gfx1101` | `rdna3` | `rx7700xt` +| AMD RX6000 series | | `rdna2` | +| AMD RX5000 series | | `rdna1` | +| ARM Mali G715 | | `valhall4` | e.g., `mali-g715` +| ARM Mali G510 | | `valhall3` | e.g., `mali-g510` +| ARM GPUs | | `valhall` | +| NVIDIA RTX40 series | `sm_89` | `ada` | e.g., `rtx4090` +| NVIDIA RTX30 series | `sm_86` | `ampere` | e.g., `rtx3080ti` +| NVIDIA RTX20 series | `sm_75` | `turing` | e.g., `rtx2070super` +| Qualcomm GPUs | | `adreno` | + +If no target is specified, then a safe but more limited default will be used. +!!! note annotate + Note that We don't support the full spectrum of GPUs here(2). This is more of a mechanism to help us develop IREE itself--in the long term we want to perform multiple targetting to generate to multiple architectures - if no target triple is given. + if no target is given. -1. It's also impossible to capture all details of a Vulkan implementation +1. Note that we only support very limited GPUs that we are actively developing + against in this category, particularly for desktops. +2. It's also impossible to capture all details of a Vulkan implementation with a target triple, given the allowed variances on extensions, properties, limits, etc. So the target triple is just an approximation for usage. -| GPU Vendor | Target Triple | -| ------------------- | --------------------------------------------- | -| ARM Mali GPU | e.g. `valhall-unknown-{android30|android31}` | -| Qualcomm Adreno GPU | e.g. `adreno-unknown-{android30|android31}` | -| AMD GPU | e.g. `{rdna1|rdna2|rdna3}-unknown-unknown` | -| NVIDIA GPU | e.g. `{turing|ampere}-unknown-unknown` | -| SwiftShader CPU | `cpu-swiftshader-unknown` | - ### :octicons-terminal-16: Run a compiled program In the build directory, run the following command: diff --git a/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py b/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py index 85122472c768..c674e5ae47bb 100644 --- a/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py +++ b/experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py @@ -51,11 +51,11 @@ def run_sdxl_rocm_benchmark(rocm_chip): "iree-compile", f"{benchmark_dir}/sdxl_pipeline_bench_f16.mlir", "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-global-opt-propagate-transposes=true", "--iree-codegen-llvmgpu-use-vector-distribution", "--iree-codegen-gpu-native-math-precision=true", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-opt-outer-dim-concat=true", "--iree-llvmgpu-enable-prefetch", "-o", diff --git a/experimental/regression_suite/shark-test-suite-models/sd3/test_clip.py b/experimental/regression_suite/shark-test-suite-models/sd3/test_clip.py index 40a8cafcb276..1ba461c207ca 100644 --- a/experimental/regression_suite/shark-test-suite-models/sd3/test_clip.py +++ b/experimental/regression_suite/shark-test-suite-models/sd3/test_clip.py @@ -101,12 +101,12 @@ def SD3_CLIP_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-input-type=torch", "--iree-opt-const-eval=false", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-llvmgpu-enable-prefetch", "--iree-flow-enable-aggressive-fusion", "--iree-flow-enable-fuse-horizontal-contractions=true", diff --git a/experimental/regression_suite/shark-test-suite-models/sd3/test_mmdit.py b/experimental/regression_suite/shark-test-suite-models/sd3/test_mmdit.py index 506d772ff324..8e839fd4560c 100644 --- a/experimental/regression_suite/shark-test-suite-models/sd3/test_mmdit.py +++ b/experimental/regression_suite/shark-test-suite-models/sd3/test_mmdit.py @@ -82,7 +82,7 @@ def SD3_MMDIT_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", f"--iree-codegen-transform-dialect-library={iree_test_path_extension}/attention_and_matmul_spec.mlir", "--iree-global-opt-propagate-transposes=true", @@ -95,7 +95,7 @@ def SD3_MMDIT_COMMON_RUN_FLAGS( "--iree-opt-data-tiling=false", "--iree-codegen-gpu-native-math-precision=true", "--iree-codegen-llvmgpu-use-vector-distribution", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-execution-model=async-external", "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline,iree-preprocessing-pad-to-intrinsics)", ] diff --git a/experimental/regression_suite/shark-test-suite-models/sd3/test_vae.py b/experimental/regression_suite/shark-test-suite-models/sd3/test_vae.py index 881d93dbe46d..5367390defe5 100644 --- a/experimental/regression_suite/shark-test-suite-models/sd3/test_vae.py +++ b/experimental/regression_suite/shark-test-suite-models/sd3/test_vae.py @@ -59,12 +59,12 @@ def SD3_VAE_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", "--iree-llvmgpu-enable-prefetch=true", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-flow-enable-aggressive-fusion=true", "--iree-codegen-llvmgpu-use-vector-distribution=true", "--iree-execution-model=async-external", diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py index e4c3c49bd510..6ed745051bbe 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py @@ -87,12 +87,12 @@ def SDXL_CLIP_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-input-type=torch", "--iree-opt-const-eval=false", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-llvmgpu-enable-prefetch", "--iree-flow-enable-aggressive-fusion", "--iree-flow-enable-fuse-horizontal-contractions=true", diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py index 8509dcba39d8..ef2fca97dcc1 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py @@ -88,7 +88,7 @@ def SDXL_UNET_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", f"--iree-codegen-transform-dialect-library={iree_test_path_extension}/attention_and_matmul_spec.mlir", "--iree-global-opt-propagate-transposes=true", @@ -101,7 +101,7 @@ def SDXL_UNET_COMMON_RUN_FLAGS( "--iree-opt-data-tiling=false", "--iree-codegen-gpu-native-math-precision=true", "--iree-codegen-llvmgpu-use-vector-distribution", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-execution-model=async-external", "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline,iree-preprocessing-pad-to-intrinsics)", "--iree-scheduling-dump-statistics-format=json", @@ -110,7 +110,7 @@ def SDXL_UNET_COMMON_RUN_FLAGS( ROCM_PIPELINE_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--verify=false", "--iree-opt-const-eval=false", ] diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py index 5b9ab15340c6..bca80733e31e 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py @@ -59,12 +59,12 @@ def SDXL_VAE_COMMON_RUN_FLAGS( ROCM_COMPILE_FLAGS = [ "--iree-hal-target-backends=rocm", - f"--iree-rocm-target-chip={rocm_chip}", + f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", "--iree-llvmgpu-enable-prefetch=true", - "--iree-rocm-waves-per-eu=2", + "--iree-hip-waves-per-eu=2", "--iree-flow-enable-aggressive-fusion=true", "--iree-codegen-llvmgpu-use-vector-distribution=true", "--iree-execution-model=async-external", diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py index 74d5a31bb48c..5c982fa8322e 100644 --- a/experimental/regression_suite/tests/pregenerated/test_llama2.py +++ b/experimental/regression_suite/tests/pregenerated/test_llama2.py @@ -35,10 +35,7 @@ def llama2_7b_f16qi4_stripped_rdna3_vulkan_vmfb(llama2_7b_f16qi4_stripped_source llama2_7b_f16qi4_stripped_source, "rdna3_vulkan", flags=COMMON_FLAGS - + [ - "--iree-hal-target-backends=vulkan-spirv", - "--iree-vulkan-target-triple=rdna3-unknown-linux", - ], + + ["--iree-hal-target-backends=vulkan-spirv", "--iree-vulkan-target=rdna3"], ) @@ -74,10 +71,7 @@ def llama2_7b_f16qi4_a100_vulkan_vmfb(llama2_7b_f16qi4_stripped_source): llama2_7b_f16qi4_stripped_source, "a100_vulkan", flags=COMMON_FLAGS - + [ - "--iree-hal-target-backends=vulkan-spirv", - f"--iree-vulkan-target-triple=ampere-a100-linux", - ], + + ["--iree-hal-target-backends=vulkan-spirv", f"--iree-vulkan-target=ampere"], ) @@ -89,7 +83,7 @@ def llama2_7b_f16qi4_stripped_sm80_cuda_vmfb(llama2_7b_f16qi4_stripped_source): flags=COMMON_FLAGS + [ "--iree-hal-target-backends=cuda", - f"--iree-hal-cuda-llvm-target-arch=sm_80", + f"--iree-cuda-target=sm_80", ], ) @@ -102,7 +96,7 @@ def llama2_7b_f16qi4_stripped_rdna3_rocm_vmfb(llama2_7b_f16qi4_stripped_source): flags=COMMON_FLAGS + [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx1100", + "--iree-hip-target=gfx1100", ], ) @@ -115,7 +109,7 @@ def llama2_7b_f16qi4_sm80_cuda_vmfb(llama2_7b_f16qi4_source): flags=COMMON_FLAGS + [ "--iree-hal-target-backends=cuda", - f"--iree-hal-cuda-llvm-target-arch=sm_70", + f"--iree-cuda-target=sm_70", ], ) diff --git a/experimental/regression_suite/tests/pregenerated/test_ukernel.py b/experimental/regression_suite/tests/pregenerated/test_ukernel.py index 11806acd250c..61cfd8864d97 100644 --- a/experimental/regression_suite/tests/pregenerated/test_ukernel.py +++ b/experimental/regression_suite/tests/pregenerated/test_ukernel.py @@ -42,8 +42,8 @@ def argmax_ukernel_gfx90a_rocm_vmfb(argmax_ukernel_source): flags=COMMON_FLAGS + [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx90a", - "--iree-rocm-enable-ukernels=argmax", + "--iree-hip-target=gfx90a", + "--iree-hip-enable-ukernels=argmax", ], ) @@ -56,8 +56,8 @@ def argmax_ukernel_gfx940_rocm_vmfb(argmax_ukernel_source): flags=COMMON_FLAGS + [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx940", - "--iree-rocm-enable-ukernels=argmax", + "--iree-hip-target=gfx940", + "--iree-hip-enable-ukernels=argmax", ], ) diff --git a/integrations/pjrt/python_packages/iree_rocm_plugin/setup.py b/integrations/pjrt/python_packages/iree_rocm_plugin/setup.py index 808c2a86f521..bad923777abf 100644 --- a/integrations/pjrt/python_packages/iree_rocm_plugin/setup.py +++ b/integrations/pjrt/python_packages/iree_rocm_plugin/setup.py @@ -86,7 +86,7 @@ def build_default_configuration(self): # plugins. This augments the path based scanning that Jax does, which # is not always robust to all packaging circumstances. "jax_plugins": [ - "iree-rocm = jax_plugins.iree_rocm", + "iree-hip = jax_plugins.iree_rocm", ], }, install_requires=iree_pjrt_setup.install_requires, diff --git a/runtime/src/iree/hal/drivers/hip/cts/CMakeLists.txt b/runtime/src/iree/hal/drivers/hip/cts/CMakeLists.txt index 1b5b5b7c3ae1..ea1c7a3726ac 100644 --- a/runtime/src/iree/hal/drivers/hip/cts/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/hip/cts/CMakeLists.txt @@ -10,7 +10,7 @@ endif() unset(IREE_HIP_TEST_COMPILER_FLAGS) list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" ) iree_hal_cts_test_suite( diff --git a/tests/e2e/collectives/CMakeLists.txt b/tests/e2e/collectives/CMakeLists.txt index d33e393a3934..f8c097c5bc19 100644 --- a/tests/e2e/collectives/CMakeLists.txt +++ b/tests/e2e/collectives/CMakeLists.txt @@ -13,7 +13,7 @@ if(IREE_TARGET_BACKEND_CUDA AND IREE_HAL_DRIVER_CUDA) set(COMMON_ARGS "--target_backend=cuda" "--driver=cuda" - "--iree_compiler_args=--iree-hal-cuda-llvm-target-arch=sm_53" + "--iree_compiler_args=--iree-cuda-target=sm_53" ) set(COMMON_LABELS @@ -66,7 +66,7 @@ if(IREE_TARGET_BACKEND_ROCM AND IREE_HAL_DRIVER_HIP AND IREE_HIP_TEST_TARGET_CHI set(COMMON_ARGS "--target_backend=rocm" "--driver=hip" - "--iree_compiler_args=--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" + "--iree_compiler_args=--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" ) set(COMMON_LABELS diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel index b4c2b51e429b..d73f468fd1d4 100644 --- a/tests/e2e/matmul/BUILD.bazel +++ b/tests/e2e/matmul/BUILD.bazel @@ -435,7 +435,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f32_large_tensorcore", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -461,7 +461,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f32_large_unaligned", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -486,7 +486,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f16_large_unaligned", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -512,7 +512,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f32_large_mma_sync", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -539,7 +539,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f16_large_tensorcore", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -566,7 +566,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_cuda_f16_large_mma_sync", compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", + "--iree-cuda-target=sm_80", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -627,7 +627,7 @@ iree_generated_e2e_runner_test( [iree_generated_e2e_runner_test( name = "e2e_matmul_vulkan_{0}_large_valhall".format(lhs_rhs_type), compiler_flags = [ - "--iree-vulkan-target-triple=valhall-unknown-android31", + "--iree-vulkan-target=valhall", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -654,7 +654,7 @@ iree_generated_e2e_runner_test( [iree_generated_e2e_runner_test( name = "e2e_matmul_vulkan_{0}_large_ampere".format(lhs_rhs_type), compiler_flags = [ - "--iree-vulkan-target-triple=ampere-unknown-linux", + "--iree-vulkan-target=ampere", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -680,7 +680,7 @@ iree_generated_e2e_runner_test( iree_generated_e2e_runner_test( name = "e2e_matmul_vulkan_f16_large_rdna3", compiler_flags = [ - "--iree-vulkan-target-triple=rdna3-unknown-linux", + "--iree-vulkan-target=rdna3", ], generator = ":generate_e2e_matmul_tests", generator_args = [ diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt index 0556e756bef6..5651d17de0aa 100644 --- a/tests/e2e/matmul/CMakeLists.txt +++ b/tests/e2e/matmul/CMakeLists.txt @@ -1911,7 +1911,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -1937,7 +1937,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -1963,7 +1963,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -1990,7 +1990,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -2017,7 +2017,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -2044,7 +2044,7 @@ iree_generated_e2e_runner_test( DRIVERS "cuda" COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" + "--iree-cuda-target=sm_80" LABELS "noasan" "nomsan" @@ -2098,7 +2098,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" LABELS "requires-gpu-nvidia" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2122,7 +2122,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" LABELS "requires-gpu-nvidia" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2146,7 +2146,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" LABELS "requires-gpu-nvidia" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2170,7 +2170,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" + "--iree-vulkan-target=ampere" LABELS "requires-gpu-sm80" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2194,7 +2194,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" + "--iree-vulkan-target=ampere" LABELS "requires-gpu-sm80" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2218,7 +2218,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" + "--iree-vulkan-target=ampere" LABELS "requires-gpu-sm80" "vulkan_uses_vk_khr_shader_float16_int8" @@ -2242,7 +2242,7 @@ iree_generated_e2e_runner_test( DRIVERS "vulkan" COMPILER_FLAGS - "--iree-vulkan-target-triple=rdna3-unknown-linux" + "--iree-vulkan-target=rdna3" RUNNER_ARGS "--require_exact_results=false" LABELS @@ -2258,7 +2258,7 @@ if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") unset(IREE_HIP_TEST_COMPILER_FLAGS) list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" ) iree_generated_e2e_runner_test( @@ -2442,7 +2442,7 @@ elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") unset(IREE_HIP_TEST_COMPILER_FLAGS) list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" + "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}" ) iree_generated_e2e_runner_test( diff --git a/tests/e2e/vulkan_specific/BUILD.bazel b/tests/e2e/vulkan_specific/BUILD.bazel index b95550d8fcb0..5bf2493459d8 100644 --- a/tests/e2e/vulkan_specific/BUILD.bazel +++ b/tests/e2e/vulkan_specific/BUILD.bazel @@ -21,7 +21,7 @@ iree_check_single_backend_test_suite( ], compiler_flags = [ "--iree-input-type=stablehlo", - "--iree-vulkan-target-triple=valhall-unknown-android31", + "--iree-vulkan-target=valhall", ], driver = "vulkan", tags = [ @@ -40,7 +40,7 @@ iree_check_single_backend_test_suite( ], compiler_flags = [ "--iree-input-type=stablehlo", - "--iree-vulkan-target-triple=valhall-unknown-android31", + "--iree-vulkan-target=valhall", ], driver = "vulkan", target_backend = "vulkan-spirv", @@ -55,7 +55,7 @@ iree_check_single_backend_test_suite( compiler_flags = [ "--iree-input-type=stablehlo", "--iree-input-demote-i64-to-i32=false", - "--iree-vulkan-target-triple=valhall-unknown-android31", + "--iree-vulkan-target=valhall", ], driver = "vulkan", tags = [ diff --git a/tests/e2e/vulkan_specific/CMakeLists.txt b/tests/e2e/vulkan_specific/CMakeLists.txt index dadc679390ef..d58160a96be6 100644 --- a/tests/e2e/vulkan_specific/CMakeLists.txt +++ b/tests/e2e/vulkan_specific/CMakeLists.txt @@ -22,7 +22,7 @@ iree_check_single_backend_test_suite( "vulkan" COMPILER_FLAGS "--iree-input-type=stablehlo" - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" LABELS "manual" "notap" @@ -41,7 +41,7 @@ iree_check_single_backend_test_suite( "vulkan" COMPILER_FLAGS "--iree-input-type=stablehlo" - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" ) iree_check_single_backend_test_suite( @@ -57,7 +57,7 @@ iree_check_single_backend_test_suite( COMPILER_FLAGS "--iree-input-type=stablehlo" "--iree-input-demote-i64-to-i32=false" - "--iree-vulkan-target-triple=valhall-unknown-android31" + "--iree-vulkan-target=valhall" LABELS "manual" "notap" diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json index 05fefd688326..ad752bc9313f 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json @@ -2,7 +2,7 @@ "config_name": "gpu_rocm_rdna3", "iree_compile_flags": [ "--iree-hal-target-backends=rocm", - "--iree-rocm-target-chip=gfx1100", + "--iree-hip-target=gfx1100", "--iree-input-demote-f64-to-f32=false" ], "iree_run_module_flags": [