diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp index 4edaffa6159e..cb46bd5c185a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp @@ -443,55 +443,6 @@ getMaterializeEncodingFn(IREE::HAL::ExecutableTargetAttr targetAttr) { }; } -// Like getMaterializeEncodingFn, but iterating over an array of targets and -// returning the max of all tile sizes from each target, checking that other -// materialization info (permutations) agree. -// -// This is useful to compute padding amounts, in the materialization of -// UpperBoundTileSizeOp, in top-level functions that are not part of one HAL -// executable variant. There, the padding amounts only control the size of -// allocated buffers, so it's OK to over-estimate (only wasting some memory) -// but not under-estimate (would cause buffer overruns) padding amounts. -static MaterializeEncodingFn getUpperBoundMaterializeEncodingFn( - ArrayRef targetAttrs) { - return - [targetAttrs]( - RankedTensorType tensorType) -> FailureOr { - FailureOr result; // Defaults to failure. - for (auto targetAttr : targetAttrs) { - FailureOr info = - materializeEncodingForTarget(tensorType, targetAttr); - if (failed(info)) { - // No info at this iteration. Ignore and continue. - continue; - } - if (failed(result)) { - // No preexisting result. Use this iteration's info and continue. - result = info; - continue; - } - // Merge this iteration's info into preexisting result info. - // Check that permutations match, then record the max of tile sizes. - if (info->innerDimsPos != result->innerDimsPos || - info->outerDimsPerm != result->outerDimsPerm) { - return failure(); - } - if (info->innerTileSizes.size() != result->innerTileSizes.size()) { - return failure(); - } - for (unsigned i = 0; i < info->innerTileSizes.size(); ++i) { - if (ShapedType::isDynamic(info->innerTileSizes[i])) { - result->innerTileSizes[i] = ShapedType::kDynamic; - } else { - result->innerTileSizes[i] = - std::max(result->innerTileSizes[i], info->innerTileSizes[i]); - } - } - } - return result; - }; -} - static FailureOr chooseDynamicEncodingInfoVMVXMicrokernels(RankedTensorType tensorType, OpBuilder &builder, Location loc) { @@ -665,66 +616,4 @@ std::unique_ptr createCPUMaterializeDeviceEncodingPass() { return std::make_unique(); } -// NOTE: this runs on host modules. -struct CPUMaterializeUpperBoundTileSizePass - : public CPUMaterializeUpperBoundTileSizeBase< - CPUMaterializeUpperBoundTileSizePass> { - CPUMaterializeUpperBoundTileSizePass() = default; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override { - auto moduleOp = getOperation(); - - // Run required analysis passes. - IREE::Stream::AffinityAnalysis affinityAnalysis(moduleOp); - if (failed(affinityAnalysis.run())) { - return signalPassFailure(); - } - IREE::HAL::DeviceAnalysis deviceAnalysis(moduleOp); - if (failed(deviceAnalysis.run())) { - return signalPassFailure(); - } - - for (auto funcOp : moduleOp.getOps()) { - // Gather the required executable targets for the function. Note that it's - // possible there are more required for ops nested within the function but - // this pass is a hack and can't handle that :shrug:. - auto executableTargets = getFuncExecutableTargetAttrs( - funcOp, affinityAnalysis, deviceAnalysis); - if (!executableTargets) { - funcOp.emitOpError() - << "could not determine executable targets for the function"; - return signalPassFailure(); - } else if (executableTargets->empty()) { - // Probably no tensors. - continue; - } - - // Get patterns specialized for the executable targets used by the - // function. - RewritePatternSet patterns(&getContext()); - MaterializeEncodingFn materializeEncodingFn = - getUpperBoundMaterializeEncodingFn(executableTargets->getArrayRef()); - if (!materializeEncodingFn) - return signalPassFailure(); - populateMaterializeUpperBoundTileSizePatterns(patterns, - materializeEncodingFn); - - // Run patterns on the function. - if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) { - funcOp.emitOpError( - "encoding padding sizes materialization pattern failed"); - return signalPassFailure(); - } - } - } -}; - -std::unique_ptr createCPUMaterializeUpperBoundTileSizePass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h index f5f9a31d5b80..e6e39a032d48 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h @@ -26,23 +26,6 @@ namespace mlir::iree_compiler { std::unique_ptr createCPUMaterializeHostEncodingPass(); std::unique_ptr createCPUMaterializeDeviceEncodingPass(); -/// Like createLLVMCPUMaterializeEncodingPass, but specifically for -/// encoding.upper_bound_tile_size, converting it to constants. -/// -/// Unlike createLLVMCPUMaterializeEncodingPass, this does not require the -/// op to have a specific HAL target attribute. Instead, this will iterate over -/// all HAL target attributes, use the maximum of all padding sizes from each -/// target. This is needed because in top-level functions outside of HAL -/// executables, there are upper_bound_tile_size ops (created by SetEncoding, -/// and computing buffer allocation sizes) and there isn't one specific HAL -/// target. -/// -/// In the VMVX case where padding sizes are not compile-time constants, this -/// converts upper_bound_tile_size to some specific constant size (currently 16) -/// that is the largest tile size that we can use in VMVX, and can be adjusted -// as needed. -std::unique_ptr createCPUMaterializeUpperBoundTileSizePass(); - /// Adds CPU bufferization passes to the pipeline. void addCPUBufferizePasses(OpPassManager &funcPassManager); diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td index 6329c532cc0d..dd120bbe580a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td @@ -25,12 +25,6 @@ def CPUMaterializeDeviceEncoding : let constructor = "mlir::iree_compiler::createCPUMaterializeDeviceEncodingPass()"; } -def CPUMaterializeUpperBoundTileSize : - Pass<"iree-codegen-cpu-materialize-upper-bound-tile-size", "mlir::ModuleOp"> { - let summary = "Materialize upper_bound_tile_size to constants."; - let constructor = "mlir::iree_compiler::createCPUMaterializeUpperBoundTileSizePass()"; -} - def CPULowerToUKernels : Pass<"iree-codegen-cpu-lower-to-ukernels", ""> { let summary = diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h index 42b4438f63a6..6312d80e06c7 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h +++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h @@ -96,9 +96,6 @@ void populateMaterializeEncodingIntoPackUnPackPatterns( MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn); -void populateMaterializeUpperBoundTileSizePatterns( - RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn); - // Returns true if `encoding` represents a narrow-N matmul RESULT, e.g. the // result of a matvec. bool isNarrowNResult(IREE::Encoding::EncodingAttr encoding); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp index 6fc4dbd076bd..08d1bb1b683b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp @@ -54,18 +54,6 @@ struct MaterializeEncodingIntoNopPass return signalPassFailure(); } - { - RewritePatternSet patterns(context); - populateMaterializeUpperBoundTileSizePatterns(patterns, - materializeEncodingFn); - if (failed( - applyPatternsAndFoldGreedily(operation, std::move(patterns)))) { - operation.emitOpError( - "encoding padding sizes materialization pattern failed"); - return signalPassFailure(); - } - } - // Add patterns to resolve dims ops and cleanups. { RewritePatternSet patterns(context); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp index 281c39849f4b..9bf27aa0e1c0 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp @@ -289,38 +289,6 @@ static FailureOr lowerUnsetEncodingToUnpackOp( *innerTileSizesOfr, materializeEncodingInfo->outerDimsPerm); } -static FailureOr> lowerUpperBoundTileSizeOpToConstants( - RewriterBase &rewriter, - IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp, - MaterializeEncodingFn materializeEncodingFn) { - Location loc = upperBoundTileSizeOp.getLoc(); - RankedTensorType tensorType = upperBoundTileSizeOp.getTensorType(); - FailureOr materializeEncodingInfo = - materializeEncodingFn(tensorType); - if (failed(materializeEncodingInfo)) { - return rewriter.notifyMatchFailure(upperBoundTileSizeOp, - "unhandled source encoding"); - } - ArrayRef innerTileSizes = materializeEncodingInfo->innerTileSizes; - ArrayRef innerDimsPos = materializeEncodingInfo->innerDimsPos; - SmallVector results(tensorType.getRank()); - for (unsigned i = 0; i < innerTileSizes.size(); ++i) { - int64_t tileSize = innerTileSizes[i]; - if (ShapedType::isDynamic(tileSize)) { - tileSize = 16; - } - results[innerDimsPos[i]] = - rewriter.create(loc, tileSize); - } - // For the dims that have no inner tiles, use 1 as tile size to avoid padding. - for (unsigned i = 0; i < results.size(); ++i) { - if (!results[i]) { - results[i] = rewriter.create(loc, 1); - } - } - return results; -} - static FailureOr lowerContractionOpWithEncoding(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ValueRange operands, @@ -788,36 +756,6 @@ struct UnsetEncodingOpToUnPackOpConversion } }; -/// Convert `upper_bound_tile_size` op to `constant` op. If the -/// `materializeEncodingFn` returns a failure, the pattern will materialize it -/// to the same shape. -struct UpperBoundTileSizeToConstantOpConversion - : public OpRewritePattern { - UpperBoundTileSizeToConstantOpConversion( - MLIRContext *context, MaterializeEncodingFn materializeEncodingFn) - : OpRewritePattern(context), - materializeEncodingFn(materializeEncodingFn) {} - - LogicalResult - matchAndRewrite(IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp, - PatternRewriter &rewriter) const override { - - auto constants = lowerUpperBoundTileSizeOpToConstants( - rewriter, upperBoundTileSizeOp, materializeEncodingFn); - if (failed(constants)) { - SmallVector results(upperBoundTileSizeOp.getNumResults(), - rewriter.create( - upperBoundTileSizeOp.getLoc(), 1)); - rewriter.replaceOp(upperBoundTileSizeOp, results); - return success(); - } - rewriter.replaceOp(upperBoundTileSizeOp, *constants); - return success(); - } - - MaterializeEncodingFn materializeEncodingFn; -}; - /// Generic pattern to convert operation that is in Destination Passing Style. template struct MaterializeDPSOperation : public OpMaterializeEncodingPattern { @@ -959,10 +897,4 @@ void populateMaterializeEncodingIntoPackUnPackPatterns( context, typeConverter, materializeEncodingValueFn); } -void populateMaterializeUpperBoundTileSizePatterns( - RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn) { - patterns.insert( - patterns.getContext(), materializeEncodingFn); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td index 733c67dbcd62..6e42b3475555 100644 --- a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td +++ b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td @@ -53,29 +53,6 @@ def IREEEncoding_SetEncodingOp : IREEEncoding_PureOp<"set_encoding",[ }]; } -//===----------------------------------------------------------------------===// -// upper_bound_tile_size op. -//===----------------------------------------------------------------------===// - -def IREEEncoding_UpperBoundTileSizeOp : IREEEncoding_PureOp<"upper_bound_tile_size", - [Pure]> { - let summary = "returns an upper bound on tile sizes"; - let description = [{ - This returns the largest tile sizes that might result from materialization - of the given encoding. This can be used outside of target-specific code, so - there may be multiple targets, and this will return the maximum tile size - from iterating over all of them. The evaluation happens in the - MaterializeUpperBoundTileSize pass. - }]; - - let arguments = (ins TypeAttrOf:$tensorType); - let results = (outs Variadic:$results); - - let assemblyFormat = [{ - attr-dict $tensorType `->` type($results) - }]; -} - //===----------------------------------------------------------------------===// // unset_encoding op. //===----------------------------------------------------------------------===// diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp index 8310b6a85852..863c9394baa8 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp @@ -8,7 +8,6 @@ #include -#include "iree/compiler/Codegen/Common/CPU/Passes.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/HAL/Target/Devices/LocalDevice.h" @@ -313,12 +312,6 @@ void buildHALTransformPassPipeline(OpPassManager &passManager, buildHALConfigurationPassPipeline(passManager, targetRegistry, targetOptions, hooks); - // HACK: this should not be here and will be going away. It exists for - // lowering iree_linalg_ext.upper_bound_tile_size ops that exist on the - // host. We should be using stream ops for performing such calculations that - // we can attach affinities to and understand what devices are being used. - passManager.addPass(createCPUMaterializeUpperBoundTileSizePass()); - // Preprocess executables using an external tool. The tool may mutate one or // more variants and even insert or remove variants. for (auto command : clPreprocessExecutablesWith) { diff --git a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp index 681b3369d165..863a64438f91 100644 --- a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp +++ b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp @@ -336,8 +336,7 @@ void registerUtilExternalModels(DialectRegistry ®istry) { registry.addExtension( +[](MLIRContext *context, IREE::Encoding::IREEEncodingDialect *dialect) { UnhoistableOpInterfaceHelper< - IREE::Encoding::SetEncodingOp, - IREE::Encoding::UpperBoundTileSizeOp>::registerOpInterface(context); + IREE::Encoding::SetEncodingOp>::registerOpInterface(context); }); // Register hoistable type interfaces for linalg ops. // We have a specific allow-list for Linalg ops because we want to consider diff --git a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp index 30baabc293a5..5c264befab8b 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp @@ -72,7 +72,6 @@ class MaterializeHomogeneousEncodingsPass } OpPassManager passManager(moduleOp.getOperationName()); - passManager.addPass(createCPUMaterializeUpperBoundTileSizePass()); passManager.addPass(createCPUMaterializeHostEncodingPass()); if (failed(runPipeline(passManager, moduleOp))) { return signalPassFailure();