From 4de493af31e370ca2eb1bb590469ebbf76fc8d5b Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Wed, 17 Jul 2024 09:41:59 -0700 Subject: [PATCH] [CPU] Enable mmt4d ukernels when iree-llvmcpu-enable-ukernels is not set (#17928) This is a follow-up for https://github.com/iree-org/iree/commit/3b5d269c7fec61743cc41f4394b33a31625ef2ae. The previous revision enables mmt4d ukernels only when `--iree-llvmcpu-enable-ukernels` is passed to IREE tools. If the `ukernels` attribute is not present in hal.executable.target, it is not enabled. The revisions removes the constraint, so users don't need to pass `--iree-llvmcpu-enable-ukernels=default` to enable the mmt4d ukernels. Signed-off-by: hanhanW --- .../Common/CPU/test/lower_to_ukernel_ops.mlir | 28 +++++++++++++++++++ .../LLVMCPU/LLVMCPULowerExecutableTarget.cpp | 1 - .../iree/compiler/Codegen/LLVMCPU/Passes.cpp | 20 ++++++------- .../iree/compiler/Codegen/LLVMCPU/Passes.h | 1 - .../src/iree/compiler/Codegen/Utils/Utils.cpp | 8 ++++-- 5 files changed, 43 insertions(+), 15 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir index e546c5432bb1..06376f531788 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir @@ -29,6 +29,34 @@ func.func @mmt4d_f32f32f32(%arg0 : tensor, %arg1 : tensor, %arg1 : tensor, + %arg2 : tensor) -> tensor attributes { + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> +} { + %0 = linalg.mmt4d ins(%arg0, %arg1 : tensor, tensor) + outs(%arg2 : tensor) -> tensor + return %0 : tensor +} +// CHECK-LABEL: func @mmt4d_no_ukernels_attr_f32f32f32( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor +// CHECK-DAG: %[[FLAGS:.+]] = arith.constant {{[0-9]+}} : i32 +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C1_i32:.+]] = arith.constant 1 : i32 +// CHECK-DAG: %[[C16_i32:.+]] = arith.constant 16 : i32 +// CHECK-DAG: %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C0]] +// CHECK-DAG: %[[K:.+]] = tensor.dim %[[ARG1]], %[[C1]] +// CHECK: %[[MICRO_KERNEL:.+]]:2 = iree_codegen.ukernel.generic "iree_uk_mmt4d" +// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : +// CHECK-SAME: outs(%[[ARG2]] : +// CHECK-SAME: (%[[M]], %[[N]], %[[K]], %[[C16_i32]], %[[C16_i32]], %[[C1_i32]], %[[FLAGS]] : +// CHECK: return %[[MICRO_KERNEL]]#0 + +// ----- + func.func @mmt4d_f32f32f32_with_none_ukernel_enabled(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor) -> tensor attributes { hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "none", target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp index eb80b230cf0a..e0d94935e92f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp @@ -104,7 +104,6 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { pipelineOpts.enableVectorMasking = isX86(target) || isRISCV(target) || (isAArch64(target) && hasAnySVEFeature(target)); - pipelineOpts.enableUkernels = hasUkernel(target); pipelineOpts.enableAArch64SSVE = isAArch64(target) && hasAnySVEFeature(target) && hasSMEFeature(target); pipelineOpts.enableAArch64I8mm = isAArch64(target) && hasI8mmFeature(target); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp index be6aeb955116..c99c3d156e77 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp @@ -501,11 +501,11 @@ void addMmt4dTilingExpertPassPipeline(OpPassManager &funcPassManager, funcPassManager.addPass(createLLVMCPUTileAndFusePass( static_cast(tilingConfig.getVectorCommonParallelLevel()))); - if (pipelineOpt.enableUkernels) { - funcPassManager.addPass(createCPUPrepareUkernelsPass()); - funcPassManager.addPass( - createCPULowerToUKernelsPass(clSkipIntermediateRoundings)); - } + // The below two passes are nop if the "mmt4d" is explicitly excluded in the + // ukernels attribute. + funcPassManager.addPass(createCPUPrepareUkernelsPass()); + funcPassManager.addPass( + createCPULowerToUKernelsPass(clSkipIntermediateRoundings)); funcPassManager.addPass(createLLVMCPUTilePass( static_cast(tilingConfig.getVectorReductionLevel()))); @@ -545,11 +545,11 @@ void addCPUDataTilingPipeline(OpPassManager &funcPassManager, LLVMCPUPipelineOptions &pipelineOpt) { addTileAndDistributePasses(funcPassManager); - if (pipelineOpt.enableUkernels) { - funcPassManager.addPass(createCPUPrepareUkernelsPass()); - funcPassManager.addPass( - createCPULowerToUKernelsPass(clSkipIntermediateRoundings)); - } + // The below two passes are nop if pack/unpack is not specified in ukernels + // attribute. By default, they are disabled. + funcPassManager.addPass(createCPUPrepareUkernelsPass()); + funcPassManager.addPass( + createCPULowerToUKernelsPass(clSkipIntermediateRoundings)); funcPassManager.addPass( createLLVMCPUTilePass(tilingConfig.getVectorCommonParallelLevel())); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h index 8be1d2beae32..213629276130 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h @@ -149,7 +149,6 @@ struct LLVMCPUPipelineOptions { bool enableVectorMasking = false; bool enableAArch64SSVE = false; bool enableAArch64I8mm = false; - bool enableUkernels = false; bool lowerToAVX2 = false; }; diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp index 2999d905bbc0..abeb35d83553 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp @@ -152,10 +152,12 @@ getDefaultEnabledUkernels(IREE::HAL::ExecutableTargetAttr targetAttr) { bool hasUkernel(IREE::HAL::ExecutableTargetAttr targetAttr, StringRef ukernelName) { auto enabledUkernels = getConfigStringAttr(targetAttr, "ukernels"); - if (!enabledUkernels) { - return false; + StringRef enabledUkernelsStr; + if (enabledUkernels) { + enabledUkernelsStr = enabledUkernels->getValue(); + } else { + enabledUkernelsStr = "default"; } - StringRef enabledUkernelsStr = enabledUkernels->getValue(); // Resolve `default`. if (enabledUkernelsStr == "default") { enabledUkernelsStr = getDefaultEnabledUkernels(targetAttr);