Skip to content

Commit

Permalink
[GPU] Fix hoisting after upstream change disallowing view ops (iree-o…
Browse files Browse the repository at this point in the history
…rg#15192)

MLIR changed to disallow hoisting involving memref view-like ops, which
is heavily relied on in LLVMGPU and SPIR-V CodeGen for better
performance. Now we need to first fold aliased memrefs and then perform
hoisting.

This drops the local revert for llvm/llvm-project@94c04772
It additionally cherry-picks the following commits:
* llvm/llvm-project@ebaf8d49
* llvm/llvm-project@3049ac44

Fixes iree-org#15083
  • Loading branch information
antiagainst authored Oct 16, 2023
1 parent d3a152b commit 193c132
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 25 deletions.
6 changes: 4 additions & 2 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ void addGPUMatmulTensorCorePassPipeline(OpPassManager &pm,
// Linalg -> vector
nestedModulePM.addNestedPass<func::FuncOp>(
createLLVMGPUTensorCoreVectorizationPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(
memref::createFoldMemRefAliasOpsPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
nestedModulePM.addNestedPass<func::FuncOp>(
createOptimizeVectorTransferPass());
Expand Down Expand Up @@ -302,7 +303,8 @@ void addGPUMatmulTensorCoreMmaSyncPassPipeline(OpPassManager &pm,
// Linalg -> vector
nestedModulePM.addNestedPass<func::FuncOp>(
createLLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType::MMA_SYNC));
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(
memref::createFoldMemRefAliasOpsPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
nestedModulePM.addNestedPass<func::FuncOp>(
createOptimizeVectorTransferPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,17 @@ module attributes {hal.device.targets = [#device_target_cuda]} {
// CHECK-LABEL: func.func @conv_nchw
// TODO: hoist the accumulator read and fold the transfer_write.
// CHECK: vector.transfer_write
// CHECK-COUNT-4: vector.transfer_read
// CHECK: scf.for
// CHECK: scf.for
// CHECK-COUNT-3: vector.transfer_read
// CHECK: vector.contract
// CHECK: vector.transfer_write
// CHECK-COUNT-2: vector.transfer_read
// CHECK: vector.contract
// CHECK: vector.transfer_write
// CHECK-COUNT-2: vector.transfer_read
// CHECK: vector.transfer_read
// CHECK: vector.contract
// CHECK: vector.transfer_write
// CHECK-COUNT-2: vector.transfer_read
// CHECK: vector.transfer_read
// CHECK: vector.contract
// CHECK: vector.transfer_read
// CHECK: vector.contract
// CHECK: vector.transfer_write
// CHECK: scf.yield
// CHECK: scf.yield
// CHECK-COUNT-4: vector.transfer_write
30 changes: 16 additions & 14 deletions compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,15 @@ void addSPIRVCooperativeMatrixVectorizePassPipeline(OpPassManager &pm,
createGenericVectorizationPass(options));
}

// With subview ops, vector hoisting won't kick in. So fold memref subview ops
// before performing vector unrolling and hoisting.
nestedModulePM.addNestedPass<func::FuncOp>(
memref::createFoldMemRefAliasOpsPass());

// Vectorize to cooperative ops.
nestedModulePM.addNestedPass<func::FuncOp>(
createSPIRVVectorizeToCooperativeOpsPass());
nestedModulePM.addPass(createCSEPass());
nestedModulePM.addNestedPass<func::FuncOp>(
createHoistRedundantVectorTransfersPass());
nestedModulePM.addNestedPass<func::FuncOp>(
Expand All @@ -392,10 +398,6 @@ void addSPIRVCooperativeMatrixVectorizePassPipeline(OpPassManager &pm,
nestedModulePM.addNestedPass<func::FuncOp>(createOptimizeVectorTransferPass(
/*flatten=*/false, /*dropUnitDims=*/false));

// Fold subview ops is reqiured for converting vector transfer ops into SPIR-V
// cooperative ops in the next step.
nestedModulePM.addPass(memref::createFoldMemRefAliasOpsPass());

nestedModulePM.addNestedPass<func::FuncOp>(createForOpCanonicalizationPass());
nestedModulePM.addPass(createCanonicalizerPass());
nestedModulePM.addPass(createCSEPass());
Expand Down Expand Up @@ -466,19 +468,19 @@ void addSPIRVMatmulPromoteVectorizePassPipeline(OpPassManager &topPM,
nestedPM.addNestedPass<func::FuncOp>(createGPUReduceSharedMemoryBankConflicts(
detail::bankConflictReductionPaddingBits));

addSPIRVVectorLoweringPasses(nestedPM);
// With subview ops, vector hoisting won't kick in. So fold memref subview ops
// before performing vector unrolling and hoisting.
nestedPM.addNestedPass<func::FuncOp>(memref::createFoldMemRefAliasOpsPass());

nestedPM.addNestedPass<func::FuncOp>(createSPIRVInitialVectorLoweringPass());
nestedPM.addPass(createCSEPass());
nestedPM.addNestedPass<func::FuncOp>(
createHoistRedundantVectorTransfersPass());
nestedPM.addNestedPass<func::FuncOp>(createSPIRVFinalVectorLoweringPass());

nestedPM.addNestedPass<func::FuncOp>(createForOpCanonicalizationPass());
nestedPM.addPass(createCanonicalizerPass());
nestedPM.addPass(createCSEPass());
// After vectorization and some basic cleanup, optimize vector transfer ops.
// Here we won't have large n-D vectors being put as loop carried values due
// to hoisting. Because this is before folding all memref subview ops away, we
// still have subview ops using the same indices, which allows for transfer
// read/write forwarding.
nestedPM.addNestedPass<func::FuncOp>(createOptimizeVectorTransferPass(
/*flatten=*/false, /*dropUnitDims=*/false));

nestedPM.addNestedPass<func::FuncOp>(memref::createFoldMemRefAliasOpsPass());
nestedPM.addNestedPass<func::FuncOp>(createOptimizeVectorTransferPass(
/*flatten=*/false, /*dropUnitDims=*/false));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,11 @@ Value mlir::iree_compiler::gpu::buildConvertToTensorCoreOp(
// be replaced by a single transform.
b.create<SynchronizeLoopOp>(forH);

b.create<transform::ApplyPatternsOp>(funcH, [](OpBuilder &b, Location loc) {
b.create<transform::ApplyFoldMemrefAliasOpsPatternsOp>(loc);
});
b.create<IREE::transform_dialect::ApplyCommonSubexpressionEliminationOp>(
funcH);
// TODO: not a functional style transform and avoid returning funcH.
funcH = b.create<transform::HoistRedundantVectorTransfersOp>(
transform::AnyOpType::get(b.getContext()), funcH);
Expand Down
2 changes: 1 addition & 1 deletion third_party/llvm-project

0 comments on commit 193c132

Please sign in to comment.