Skip to content

Commit

Permalink
[compiler] Make cuda/hip/vulkan target cl options consistent (iree-or…
Browse files Browse the repository at this point in the history
…g#17710)

This commits changes the command line options for specifying
targets for CUDA/HIP/Vulkan to `--iree-<api>-target` and the
corresponding feature option to `--iree-<api>-target-features`.
This makes more sense given we are not using a triple or
a chip anymore--it's more flexible as to accept codenames and
product names. This also makes the options consistent.

This is a breaking change. Here are the main list of options:

Before | After
:---: | :---:
`--iree-hal-cuda-llvm-target-arch` | `--iree-cuda-target`
`--iree-hal-cuda-llvm-target-feature` | `--iree-cuda-target-features`
`--iree-rocm-target-chip` | `--iree-hip-target`
`--iree-rocm-target-features` | `--iree-hip-target-features`
`--iree-vulkan-target-triple` | `--iree-vulkan-target`
`--iree-hal-cuda-*` | `--iree-cuda-*`
`--iree-rocm-*` | `--iree-hip-*`

Progress towards iree-org#16341

ci-extra:
test_nvidia_gpu,test_nvidia_a100,test_amd_mi250,build_test_all_macos_arm64,build_and_test_android
  • Loading branch information
antiagainst authored Aug 22, 2024
1 parent d1ccc8c commit c44d29b
Show file tree
Hide file tree
Showing 32 changed files with 247 additions and 187 deletions.
2 changes: 1 addition & 1 deletion build_tools/cmake/iree_check_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function(iree_check_test)
list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}")
endif()
if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM")
list(APPEND _BASE_COMPILER_FLAGS "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}")
list(APPEND _BASE_COMPILER_FLAGS "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}")
endif()

if(_BYTECODE_MODULE_BUILD_ENABLED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"config_name": "gpu_rocm",
"iree_compile_flags": [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx90a",
"--iree-hip-target=gfx90a",
"--iree-input-demote-f64-to-f32"
],
"iree_run_module_flags": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"config_name": "gpu_rocm",
"iree_compile_flags": [
"--iree-hal-target-backends=rocm",
"--iree-rocm-target-chip=gfx942",
"--iree-hip-target=gfx942",
"--iree-input-demote-f64-to-f32"
],
"iree_run_module_flags": [
Expand Down
81 changes: 43 additions & 38 deletions compiler/plugins/target/CUDA/CUDATarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,59 +55,64 @@ namespace mlir::iree_compiler::IREE::HAL {
namespace {
struct CUDAOptions {
bool dumpPtx = false;
std::string clTargetChip = "sm_60";
std::string clTargetFeature = "+ptx76";
std::string clTarget = "sm_60";
std::string clTargetFeatures = "+ptx76";
bool clUsePtxas = false;
std::string clUsePtxasFrom;
std::string clUsePtxasParams;

void bindOptions(OptionsBinder &binder) {
static llvm::cl::OptionCategory category("CUDA HAL Target");
binder.opt<bool>("iree-hal-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category),
binder.opt<bool>("iree-cuda-dump-ptx", dumpPtx, llvm::cl::cat(category),
llvm::cl::desc("Dump ptx to the debug stream."));

binder.opt<std::string>("iree-hal-cuda-llvm-target-arch", clTargetChip,
llvm::cl::cat(category),
llvm::cl::desc("LLVM target chip."));
binder.opt<std::string>(
"iree-cuda-target", clTarget, llvm::cl::cat(category),
llvm::cl::desc(
// clang-format off
"CUDA target as expected by LLVM NVPTX backend; e.g., "
"'sm_80'/'sm_90' for targeting Ampere/Hopper GPUs. "
"Additionally this also supports architecture code names like "
"'turing'/'ampere' or some product names like 'a100'/'rtx3090ti' "
"for a better experience. See "
"https://iree.dev/guides/deployment-configurations/gpu-cuda "
"for more details."
// clang-format on
));

binder.opt<std::string>("iree-hal-cuda-llvm-target-feature",
clTargetFeature, llvm::cl::cat(category),
llvm::cl::desc("Use to set PTX version."));
binder.opt<std::string>(
"iree-cuda-target-features", clTargetFeatures, llvm::cl::cat(category),
llvm::cl::desc(
"CUDA target features as expected by LLVM NVPTX backend; e.g. "
"use '+ptxNN' to set PTX version to NN."));

binder.opt<bool>(
"iree-hal-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category),
llvm::cl::desc("It uses the ptxas compiler that is on the environment, "
"compiles the "
"generated PTX code with it, puts the cubin binary "
"generated by ptxas "
"into the executable. "
"'--iree-hal-cuda-llvm-target-arch' is used as "
"the target GPU. If passing additional parameters to "
"ptxas is desired, "
"the parameters flag can be used "
"(e.g.'--iree-hal-cuda-use-ptxas-params=-v')."));
"iree-cuda-use-ptxas", clUsePtxas, llvm::cl::cat(category),
llvm::cl::desc(
"Whether to use the ptxas tool to assemble the generated PTX "
"code and put the generated CUBIN binary file into the executable. "
"If not set, directly embeds the PTX into the executable. "
"To specify the exact ptxas tool path, use "
"'--iree-cuda-use-ptxas-from'. To pass "
"additional parameters to ptxas, use "
"'--iree-cuda-use-ptxas-params', e.g. "
"'--iree-cuda-use-ptxas-params=-v'"));

binder.opt<std::string>(
"iree-hal-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category),
llvm::cl::desc(
"It uses the provided ptxas compiler, compiles the generated PTX "
"code with it, puts the cubin binary generated by ptxas into the "
"executable. '--iree-hal-cuda-llvm-target-arch' is used as the "
"target GPU. If passing additional parameters to ptxas is desired, "
"the "
"parameters flag can be used "
"(e.g.'--iree-hal-cuda-use-ptxas-params=-v')."));
"iree-cuda-use-ptxas-from", clUsePtxasFrom, llvm::cl::cat(category),
llvm::cl::desc("Uses the ptxas tool from the given path. Requires "
"'--iree-cuda-use-ptxas' to be true."));

binder.opt<std::string>(
"iree-hal-cuda-use-ptxas-params", clUsePtxasParams,
llvm::cl::cat(category),
llvm::cl::desc("Passes the given additional parameters to ptxas."));
"iree-cuda-use-ptxas-params", clUsePtxasParams, llvm::cl::cat(category),
llvm::cl::desc("Passes the given additional parameters to ptxas. "
"Requires '--iree-cuda-use-ptxas' to be true."));
}

LogicalResult verify(mlir::Builder &builder) const {
if (GPU::normalizeCUDATarget(clTargetChip).empty()) {
if (GPU::normalizeCUDATarget(clTarget).empty()) {
return emitError(builder.getUnknownLoc(), "Unknown CUDA target '")
<< clTargetChip << "'";
<< clTarget << "'";
}
return success();
}
Expand All @@ -131,7 +136,7 @@ static FailureOr<std::string> findPtxasCompiler(const CUDAOptions &options,

*message = std::string(
"Could not find ptxas compiler. Try passing it explicitly with "
"--iree-hal-cuda-use-ptxas-from=<path> flag");
"--iree-cuda-use-ptxas-from=<path> flag");
return failure();
}

Expand Down Expand Up @@ -437,7 +442,7 @@ class CUDATargetBackend final : public TargetBackend {
return nullptr;

if (auto target = GPU::getCUDATargetDetails(
options.clTargetChip, options.clTargetFeature, context))
options.clTarget, options.clTargetFeatures, context))
addConfig("iree.gpu.target", target);

return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
Expand Down Expand Up @@ -471,8 +476,8 @@ class CUDATargetBackend final : public TargetBackend {
IREE::HAL::ExecutableVariantOp variantOp,
OpBuilder &executableBuilder) override {
auto targetAttr = variantOp.getTargetAttr();
StringRef targetArch = options.clTargetChip;
StringRef targetFeatures = options.clTargetFeature;
StringRef targetArch = options.clTarget;
StringRef targetFeatures = options.clTargetFeatures;
if (auto attr = getGPUTargetAttr(targetAttr)) {
targetArch = attr.getArch();
targetFeatures = attr.getFeatures();
Expand Down
2 changes: 1 addition & 1 deletion compiler/plugins/target/CUDA/test/smoketest.mlir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-hal-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX
// RUN: iree-opt --split-input-file --iree-hal-transformation-pipeline --iree-gpu-test-target=sm_60 --iree-cuda-dump-ptx %s 2>&1 | FileCheck %s --check-prefix=PTX

#map = affine_map<(d0) -> (d0)>

Expand Down
52 changes: 32 additions & 20 deletions compiler/plugins/target/ROCM/ROCMTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace mlir::iree_compiler::IREE::HAL {
namespace {

struct ROCmOptions {
std::string targetChip = "gfx908";
std::string target = "gfx908";
std::string targetFeatures = "";
std::string bitcodeDirectory = getDefaultBitcodeDirectory();
int wavesPerEu = 0;
Expand All @@ -65,39 +65,51 @@ struct ROCmOptions {

void bindOptions(OptionsBinder &binder) {
using namespace llvm;
static cl::OptionCategory category("ROCm HAL Target");
binder.opt<std::string>("iree-rocm-target-chip", targetChip,
cl::cat(category), cl::desc("ROCm target chip."));
static cl::OptionCategory category("HIP HAL Target");
binder.opt<std::string>(
"iree-rocm-target-features", targetFeatures, cl::cat(category),
cl::desc("ROCm target features; e.g., '+sramecc,+xnack'."));
binder.opt<std::string>("iree-rocm-bc-dir", bitcodeDirectory,
"iree-hip-target", target, cl::cat(category),
cl::desc(
// clang-format off
"HIP target as expected by LLVM AMDGPU backend; e.g., "
"'gfx90a'/'gfx942' for targeting MI250/MI300 GPUs. "
"Additionally this also supports architecture code names like "
"'cdna3'/'rdna3' or some product names like 'mi300x'/'rtx7900xtx' "
"for a better experience. See "
"https://iree.dev/guides/deployment-configurations/gpu-rocm/ "
"for more details."
// clang-format on
));
binder.opt<std::string>(
"iree-hip-target-features", targetFeatures, cl::cat(category),
cl::desc("HIP target features as expected by LLVM AMDGPU backend; "
"e.g., '+sramecc,+xnack'."));
binder.opt<std::string>("iree-hip-bc-dir", bitcodeDirectory,
cl::cat(category),
cl::desc("Directory of ROCm Bitcode."));
binder.opt<int>("iree-rocm-waves-per-eu", wavesPerEu, cl::cat(category),
cl::desc("Directory of HIP Bitcode."));
binder.opt<int>("iree-hip-waves-per-eu", wavesPerEu, cl::cat(category),
cl::desc("Optimization hint specifying minimum "
"number of waves per execution unit."));
binder.opt<std::string>(
"iree-rocm-enable-ukernels", enableROCMUkernels, cl::cat(category),
cl::desc("Enables microkernels in the rocm compiler backend. May be "
"iree-hip-enable-ukernels", enableROCMUkernels, cl::cat(category),
cl::desc("Enables microkernels in the HIP compiler backend. May be "
"`default`, `none`, `all`, or a comma-separated list of "
"specific unprefixed microkernels to enable, e.g. `mmt4d`."));
binder.opt<bool>("iree-rocm-legacy-sync", legacySync, cl::cat(category),
binder.opt<bool>("iree-hip-legacy-sync", legacySync, cl::cat(category),
cl::desc("Enables 'legacy-sync' mode, which is required "
"for inline execution."));
}

LogicalResult verify(mlir::Builder &builder) const {
if (GPU::normalizeHIPTarget(targetChip).empty()) {
return emitError(builder.getUnknownLoc(), "Unknown ROCm target '")
<< targetChip << "'";
if (GPU::normalizeHIPTarget(target).empty()) {
return emitError(builder.getUnknownLoc(), "Unknown HIP target '")
<< target << "'";
}
SmallVector<StringRef> features;
llvm::SplitString(targetFeatures, features, ",");
for (StringRef f : features) {
if (!(f.starts_with("+") || f.starts_with("-"))) {
return emitError(builder.getUnknownLoc(),
"ROCm target feature must be prefixed with '+' or "
"HIP target feature must be prefixed with '+' or "
"'-'; but seen '")
<< f << "'";
}
Expand All @@ -106,7 +118,7 @@ struct ROCmOptions {
// We only support these two features to be set explicitly. Features
// like wavefrontsize is controlled and tuned by the compiler.
return emitError(builder.getUnknownLoc(),
"ROCm target feature can only be 'sramecc' or "
"HIP target feature can only be 'sramecc' or "
"'xnack'; but seen '")
<< feature << "'";
}
Expand Down Expand Up @@ -259,7 +271,7 @@ class ROCMTargetBackend final : public TargetBackend {
if (failed(options.verify(b)))
return nullptr;

if (auto target = GPU::getHIPTargetDetails(options.targetChip,
if (auto target = GPU::getHIPTargetDetails(options.target,
options.targetFeatures, context))
addConfig("iree.gpu.target", target);

Expand Down Expand Up @@ -336,7 +348,7 @@ class ROCMTargetBackend final : public TargetBackend {
OpBuilder &executableBuilder) override {
ModuleOp innerModuleOp = variantOp.getInnerModule();
auto targetAttr = variantOp.getTargetAttr();
StringRef targetArch = options.targetChip;
StringRef targetArch = options.target;
StringRef targetFeatures = options.targetFeatures;
if (auto attr = getGPUTargetAttr(targetAttr)) {
targetArch = attr.getArch();
Expand Down Expand Up @@ -517,7 +529,7 @@ class ROCMTargetBackend final : public TargetBackend {
return variantOp.emitError()
<< "cannot find ROCM bitcode files. Check your installation "
"consistency and in the worst case, set "
"--iree-rocm-bc-dir= to a path on your system.";
"--iree-hip-bc-dir= to a path on your system.";
}
if (failed(linkHIPBitcodeIfNeeded(variantOp.getLoc(), llvmModule.get(),
targetArch, bitcodeDirectory))) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-rocm-enable-ukernels=all --iree-rocm-target-chip=gfx1100 --compile-to=executable-targets %s | FileCheck %s
// RUN: iree-compile --split-input-file --iree-hal-target-backends=rocm --iree-hip-enable-ukernels=all --iree-hip-target=gfx1100 --compile-to=executable-targets %s | FileCheck %s

// We want to check that uKernel is indeed generated from e2e workflow.

Expand Down
8 changes: 4 additions & 4 deletions compiler/plugins/target/ROCM/test/target_device_features.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=mi300x %s | FileCheck %s --check-prefix=GFX942
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx940 %s | FileCheck %s --check-prefix=GFX940
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx941 --iree-rocm-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=mi300x %s | FileCheck %s --check-prefix=GFX942
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx940 %s | FileCheck %s --check-prefix=GFX940
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-hip-target=gfx941 --iree-hip-target-features=+sramecc,-xnack %s | FileCheck %s --check-prefix=GFX941

// GFX942: target = #iree_gpu.target<arch = "gfx942",
// GFX942-SAME: wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8,
Expand Down
21 changes: 13 additions & 8 deletions compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,23 @@ struct VulkanSPIRVTargetOptions {
// Use vp_android_baseline_2022 profile as the default target--it's a good
// lowest common denominator to guarantee the generated SPIR-V is widely
// accepted for now. Eventually we want to use a list for multi-targeting.
std::string targetTriple = "vp_android_baseline_2022";
std::string target = "vp_android_baseline_2022";
bool indirectBindings = false;

void bindOptions(OptionsBinder &binder) {
static llvm::cl::OptionCategory category("VulkanSPIRV HAL Target");
binder.opt<std::string>(
// TODO: Rename this as target given it's not a triple anymore.
"iree-vulkan-target-triple", targetTriple,
"iree-vulkan-target", target,
llvm::cl::desc(
"Vulkan target triple controlling the SPIR-V environment."));
"Vulkan target controlling the SPIR-V environment. Given the wide "
"support of Vulkan, this option supports a few schemes: 1) LLVM "
"CodeGen backend style: e.g., 'gfx*' for AMD GPUs and 'sm_*' for "
"NVIDIA GPUs; 2) architecture code name style: e.g., "
"'rdna3'/'valhall4'/'ampere'/'adreno' for AMD/ARM/NVIDIA/Qualcomm "
"GPUs; 3) product name style: 'rx7900xtx'/'rtx4090' for AMD/NVIDIA "
"GPUs. See "
"https://iree.dev/guides/deployment-configurations/gpu-vulkan/ for "
"more details."));
binder.opt<bool>(
"iree-vulkan-experimental-indirect-bindings", indirectBindings,
llvm::cl::desc(
Expand Down Expand Up @@ -103,13 +110,11 @@ class VulkanSPIRVTargetBackend : public TargetBackend {
configItems.emplace_back(b.getStringAttr(name), value);
};

// We only care about the architecture right now.
StringRef arch = StringRef(options_.targetTriple).split("-").first;
if (auto target = GPU::getVulkanTargetDetails(arch, context)) {
if (auto target = GPU::getVulkanTargetDetails(options_.target, context)) {
addConfig("iree.gpu.target", target);
} else {
emitError(b.getUnknownLoc(), "Unknown Vulkan target '")
<< options_.targetTriple << "'";
<< options_.target << "'";
return nullptr;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace mlir::iree_compiler {
#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc"

static llvm::cl::opt<int>
clROCMIndexingBits("iree-rocm-index-bits",
clROCMIndexingBits("iree-hip-index-bits",
llvm::cl::desc("Set the bit width of indices in ROCm."),
llvm::cl::init(64));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-rocm-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-hip-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32

// Test that that standard and GPU ops are converted to LLVM and NVVM.
#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ TMP_DIR="../iree-tmp"
declare -a COMPILER_FLAGS=(
"--iree-input-type=stablehlo"
"--iree-hal-target-backends=cuda"
"--iree-hal-cuda-llvm-target-arch=sm_80"
"--iree-cuda-target=sm_80"
)

TIMEOUT_SECONDS_FOR_COMPILING_EACH_SOURCE=10
Expand Down
Loading

0 comments on commit c44d29b

Please sign in to comment.