diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp index d442a197387c..4feb7068daa8 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp @@ -44,6 +44,12 @@ static llvm::cl::opt clCheckLinalgVectorization( "Runs the pass to check if all the Linalg ops are vectorized"), llvm::cl::init(false)); +static llvm::cl::opt clUseFastMinMaxOps( + "iree-llvmcpu-use-fast-min-max-ops", + llvm::cl::desc( + "Use `arith.minf/maxf` instead of `arith.minimumf/maximumf` ops"), + llvm::cl::init(false)); + // TODO(#10820): Delete the flag. This should be a nop pass to default pipeline // while tensor.pad op is lowered to fill + insert_slice before Codegen. // However, it causes regressions in terms of compilation time. Skip the passes @@ -710,6 +716,11 @@ static void addLowerToLLVMPasses(OpPassManager &passManager) { passManager.addNestedPass( createHoistStaticallyBoundAllocationsPass()); + // Use `arith.minf/maxf` instead of `arith.minimumf/maximumf`. + if (clUseFastMinMaxOps) { + passManager.addNestedPass(createReplaceSlowMinMaxOpsPass()); + } + // Resolve get_buffer_descriptor ops. All structural buffer manipulations // must conclude before this point. passManager.addNestedPass(