From 864d9df716c749396844c3532cbf19c5cd5b62ec Mon Sep 17 00:00:00 2001 From: Prashant Kumar Date: Mon, 4 Nov 2024 16:18:14 +0530 Subject: [PATCH] [LLVMCPU] Add additional level of tiling to the default --- .../src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp | 6 ++++++ .../Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp | 7 +++++-- compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp | 6 +++++- compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h | 2 +- .../compiler/DispatchCreation/FormDispatchRegions.cpp | 8 ++++---- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 6b5882f44e8bb..f568b99c50d61 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2508,6 +2508,12 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, SmallVector distTileSizes = getDefaultDistributedLevelTileSizes(op, DistributionHeuristicConfig{}); TileSizesListType tileSizes = {distTileSizes}; + if(auto linalgOp = dyn_cast(*op)){ + SmallVector vecTileSizes = distTileSizes; + limitVectorTileSizes(linalgOp, vecTileSizes); + tileSizes.push_back(vecTileSizes); + } + return setOpConfigAndEntryPointFnTranslation( entryPointFn, op, tileSizes, DispatchLoweringPassPipeline::CPUDefault); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp index a62b9c310f654..fcc10077f088d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp @@ -84,6 +84,7 @@ getRootLoweringConfig(FunctionOpInterface funcOp) { static TilingConfig getTilingConfigForPipeline(FunctionOpInterface funcOp) { auto maybeLoweringConfig = getRootLoweringConfig(funcOp); + llvm::errs()<<"Hey I am here"; assert(succeeded(maybeLoweringConfig) && "Pipeline requires a lowering config"); return TilingConfig(*maybeLoweringConfig); @@ -122,9 +123,11 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { // No pipleline specified, nothing to do. case IREE::Codegen::DispatchLoweringPassPipeline::None: return; - case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: - addCPUDefaultPassPipeline(pipeline); + case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: { + TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); + addCPUDefaultPassPipeline(pipeline, tilingConfig); break; + } case IREE::Codegen::DispatchLoweringPassPipeline:: CPUBufferOpsTileAndVectorize: { TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp index 9ef65e28e94f5..87288aef42c58 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp @@ -653,8 +653,12 @@ void addCPULinalgExtTileAndVectorizePipeline( } } -void addCPUDefaultPassPipeline(OpPassManager &funcPassManager) { +void addCPUDefaultPassPipeline(OpPassManager &funcPassManager, TilingConfig &tilingConfig) { addTileAndDistributePasses(funcPassManager); + if(tilingConfig.getNumTilingLevels() > 1){ + funcPassManager.addPass(createLLVMCPUTileAndFusePass( + tilingConfig.getVectorCommonParallelLevel())); + } addCPUBufferizePasses(funcPassManager); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h index 4696bc808118c..985a74db88a0b 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h @@ -99,7 +99,7 @@ void addCPULinalgExtTileAndVectorizePipeline( /// Populates the passes to lower to scalars operations for linalg based /// code-generation. This pipeline does not vectorize, but instead just /// converts to memrefs -void addCPUDefaultPassPipeline(OpPassManager &funcPassManager); +void addCPUDefaultPassPipeline(OpPassManager &funcPassManager, TilingConfig &tilingConfig); void addConvTileAndDecomposeExpertPassPipeline( OpPassManager &funcPassManager, TilingConfig &tilingConfig, diff --git a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp index b38b1a5930011..2c32d1d1aa7c9 100644 --- a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp @@ -550,10 +550,10 @@ isFusableWithConsumer(OpOperand &fusedOperand, // TODO: Enable grouped convolution and depth wise pooling fusion. // Rightnow, this is going through the default CPU pipeline and not through // CONVTilingExpert. - if (isa(producer)) { - return false; - } + // if (isa(producer)) { + // return false; + // } auto producerFusionOp = dyn_cast(producer);