Skip to content

Commit

Permalink
[LLVMGPU] Convert maximumf/minimumf to max/min for ROCM (iree-org#15069)
Browse files Browse the repository at this point in the history
AMDGPU does not support the former
  • Loading branch information
nirvedhmeshram authored Sep 29, 2023
1 parent 14ce232 commit 113f9d2
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
12 changes: 12 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree/compiler/Codegen/Common/GPU/GPUPatterns.h"
#include "iree/compiler/Codegen/Common/Transforms.h"
#include "iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h"
#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h"
#include "iree/compiler/Codegen/LLVMGPU/Passes.h"
Expand Down Expand Up @@ -97,6 +98,17 @@ struct ConvertToROCDLPass : public ConvertToROCDLBase<ConvertToROCDLPass> {
return signalPassFailure();
}
}
{
// Convert arith::maximumf/minimumf ops on AMD gpus since the lowering
// is faulty for them.
// TODO: Remove this once the lowering in LLVM is fixed
// (https://github.com/llvm/llvm-project/issues/67815).
RewritePatternSet patterns(&getContext());
populateReplaceSlowMinMaxOpsPatterns(patterns);
if (failed(applyPatternsAndFoldGreedily(m, std::move(patterns)))) {
return signalPassFailure();
}
}
{
RewritePatternSet llvmPatterns(&getContext());
populateLowerHALInterfaceOp(llvmPatterns);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" %s | FileCheck %s
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" %s | FileCheck %s

// Test that that standard and GPU ops are converted to LLVM and NVVM.
#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
Expand Down Expand Up @@ -39,3 +39,37 @@ hal.executable @abs_ex_dispatch_0 {
// CHECK-SAME: %{{[a-zA-Z0-9]*}}: !llvm.ptr {llvm.align = 16 : i32, llvm.noalias})
// CHECK: rocdl.workgroup.dim.x
// CHECK: llvm.fadd


// -----
// Test that maximum and minum are converted to max and min on rocm
#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
#hal.descriptor_set.layout<0, bindings = [
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<4, storage_buffer>
]>,
#hal.descriptor_set.layout<1, bindings = [
#hal.descriptor_set.binding<2, storage_buffer>
]>
]>
hal.executable @abs_ex_dispatch_0 {
hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
hal.executable.export @abs_ex_dispatch_0 layout(#pipeline_layout)
builtin.module {
func.func @reduction_maximum() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) :
memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : memref<32x64x64xf32,
strided<[4096, 64, 1], offset: ?>>
%2 = vector.load %0[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32>
%3 = vector.reduction <maximumf>, %2 : vector<2xf32> into f32
%4 = vector.splat %3 : vector<2xf32>
vector.store %4, %1[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32>
return
}
}
}
}
// CHECK-LABEL: llvm.func @reduction_maximum
// CHECK: llvm.intr.vector.reduce.fmax({{.*}}) : (vector<2xf32>) -> f32

0 comments on commit 113f9d2

Please sign in to comment.